Permalink
Browse files

Get images

  • Loading branch information...
Duncan Parkes
Duncan Parkes committed May 18, 2015
1 parent 217249d commit 8298bd6ebe183ce91f9e5c51763306ffb756e7db
Showing with 9 additions and 3 deletions.
  1. +9 −3 scraper.py
View
@@ -47,11 +47,17 @@
# .jsn-table-column-email contains the email address, but only with
# javascript turned on.
# details_resp = requests.get(details_url)
# details_root = lxml.html.fromstring(details_resp.text)
# import pdb;pdb.set_trace()
details_resp = requests.get(details_url)
details_root = lxml.html.fromstring(details_resp.text)
key = (member['name'], member['term'])
try:
member['image'] = urljoin(source_url, details_root.cssselect('.jsn-contact-image')[0].cssselect('img')[0].get('src'))
except:
print "No image found for {} in {}".format(*key)
member['image'] = ''
if key in data:
print "Duplicate (name, term) pair ignored: ({}, {})".format(*key)
else:

0 comments on commit 8298bd6

Please sign in to comment.