Permalink
Browse files

Update scraper.py

  • Loading branch information...
danozgriff committed May 12, 2016
1 parent 31c7e6b commit 4bde9b09b252fb0fcd01950063284687da418b55
Showing with 6 additions and 6 deletions.
  1. +6 −6 scraper.py
View
@@ -5,19 +5,19 @@
u=urllib2.urlopen("http://pilbaraports.com.au/Shipping_Schedule/Current_Shipping_Schedule.pdf")
x=scraperwiki.pdftoxml(u.read())
print x
#print x
#r=lxml.etree.fromstring(x)
#r.xpath('//page[@number="1"]')
#r.xpath('//text[@left="64"]/b')[0:10]
#r.xpath('//text[@left="64"]/b')[8].text
#html = response.read()
#test1 = re.search(r'(.*?)<br \/><\/div>', x).group()
#tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', x)
#for tuple in tuples:
# print tuple[1]
# print tuple[2]
# print tuple[3]
tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', x)
for tuple in tuples:
print tuple[1]
print tuple[2]
print tuple[3]
#str(test1.replace(" ", "")).replace("><", ""))
#tuples = re.findall(r'(\">|\'>|img\/)(.*?)(<\/|\.gif)', str(test1.replace(" ", "")).replace("><", ""))

0 comments on commit 4bde9b0

Please sign in to comment.