Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
hicham20201441 committed Aug 27, 2020
1 parent 8b1193b commit 04c13bc
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
# # Find something on the page using css selectors
root = lxml.html.fromstring(html)
urs=[e.get("href") for e in root.cssselect("a")]
urls=[]
urls=set()

for k in urs:
if url0 in k and k!=url0:
urls.append(k)
urls.add(k)
scraperwiki.sqlite.save(unique_keys=["link"], data={"link":k})
while(len(urls)>0):
print("scraping: "+urls[0])
Expand All @@ -25,7 +26,7 @@
try:
for u in newrls:
if url0 in u and u!=url0:
urls.append(u)
urls.add(u)
scraperwiki.sqlite.save(unique_keys=["link"], data={"link": u})
except:
pass
Expand Down

0 comments on commit 04c13bc

Please sign in to comment.