Permalink
Browse files

Update scraper.py

  • Loading branch information...
danozgriff committed May 12, 2016
1 parent 1496d17 commit 1b956de4fbe7fb0697ddf3081f537b6705a70032
Showing with 6 additions and 5 deletions.
  1. +6 −5 scraper.py
View
@@ -15,12 +15,13 @@
#print r
#test1 = re.search('pdf2xml(.*?)</page>', x).group()
test1 = re.search(r'jpg((.|\n)+)TIDES', x)
print test1
#test1 = re.search(r'Day\'s Volume(.*?)<br \/><\/div>', html).group()
tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', test1)
for tuple in tuples:
print tuple[1]
print tuple[2]
print tuple[3]
#tuples = re.findall(r'((left="|width="|<b>)(.*?)(</b>|"))', test1)
#for tuple in tuples:
# print tuple[1]
# print tuple[2]
# print tuple[3]
#str(test1.replace(" ", "")).replace("><", ""))
#tuples = re.findall(r'(\">|\'>|img\/)(.*?)(<\/|\.gif)', str(test1.replace(" ", "")).replace("><", ""))

0 comments on commit 1b956de

Please sign in to comment.