Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
danozgriff committed May 12, 2016
1 parent 31c7e6b commit 4bde9b0
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scraper.py
Expand Up @@ -5,19 +5,19 @@
u=urllib2.urlopen("http://pilbaraports.com.au/Shipping_Schedule/Current_Shipping_Schedule.pdf")

x=scraperwiki.pdftoxml(u.read())
print x
#print x
#r=lxml.etree.fromstring(x)
#r.xpath('//page[@number="1"]')
#r.xpath('//text[@left="64"]/b')[0:10]
#r.xpath('//text[@left="64"]/b')[8].text

#html = response.read()
#test1 = re.search(r'(.*?)<br \/><\/div>', x).group()
#tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', x)
#for tuple in tuples:
# print tuple[1]
# print tuple[2]
# print tuple[3]
tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', x)
for tuple in tuples:
print tuple[1]
print tuple[2]
print tuple[3]
#str(test1.replace(" ", "")).replace("><", ""))
#tuples = re.findall(r'(\">|\'>|img\/)(.*?)(<\/|\.gif)', str(test1.replace(" ", "")).replace("><", ""))

Expand Down

0 comments on commit 4bde9b0

Please sign in to comment.