Update scraper.py

danozgriff · May 12, 2016 · 4bde9b0 · 4bde9b0
1 parent 31c7e6b
commit 4bde9b0
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/scraper.py b/scraper.py
@@ -5,19 +5,19 @@
 u=urllib2.urlopen("http://pilbaraports.com.au/Shipping_Schedule/Current_Shipping_Schedule.pdf")
 
 x=scraperwiki.pdftoxml(u.read())
-print x
+#print x
 #r=lxml.etree.fromstring(x)
 #r.xpath('//page[@number="1"]')
 #r.xpath('//text[@left="64"]/b')[0:10]
 #r.xpath('//text[@left="64"]/b')[8].text
 
 #html = response.read()
 #test1 = re.search(r'(.*?)<br \/><\/div>', x).group()
-#tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', x)
-#for tuple in tuples:
-# print tuple[1]
-# print tuple[2]
-# print tuple[3]
+tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', x)
+for tuple in tuples:
+ print tuple[1]
+ print tuple[2]
+ print tuple[3]
 #str(test1.replace(" ", "")).replace("><", ""))
 #tuples = re.findall(r'(\">|\'>|img\/)(.*?)(<\/|\.gif)', str(test1.replace(" ", "")).replace("><", ""))