Permalink
Browse files

Update scraper.py

  • Loading branch information...
danozgriff committed May 12, 2016
1 parent 18c530a commit b911f609b3142b4f62e1df08f1ee432d3d7777d3
Showing with 4 additions and 2 deletions.
  1. +4 −2 scraper.py
View
@@ -6,13 +6,15 @@
x=scraperwiki.pdftoxml(u.read())
#print x
#r=lxml.etree.fromstring(x)
r=lxml.etree.fromstring(x)
#r.xpath('//page[@number="1"]')
#r.xpath('//text[@left="64"]/b')[0:10]
#r.xpath('//text[@left="64"]/b')[8].text
#html = response.read()
test1 = re.search('jpg(.*?)TIDES', x).group()
print r
test1 = re.search('jpg(.*?)TIDES', r).group()
#test1 = re.search(r'Day\'s Volume(.*?)<br \/><\/div>', html).group()
tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', test1)
for tuple in tuples:
print tuple[1]

0 comments on commit b911f60

Please sign in to comment.