Permalink
Browse files

Slugify the name to provide an id.

There is a chance that this will end up merging people, but I think
it's a risk we'll have to take as there's no other person id available
that I can see.
  • Loading branch information...
Duncan Parkes
Duncan Parkes committed May 18, 2015
1 parent 9a757a7 commit 5d8aaed78a0219c765c0c7d82e120592802bac32
Showing with 10 additions and 1 deletion.
  1. +2 −0 requirements.txt
  2. +8 −1 scraper.py
View
@@ -7,3 +7,5 @@
lxml==3.4.4
cssselect==0.9.1
awesome-slugify==1.6.4
View
@@ -4,6 +4,8 @@
import scraperwiki
from slugify import slugify_unicode
source_url = 'http://www.parliament.gov.na/index.php?option=com_contact&view=category&id=104&Itemid=1479'
resp = requests.get(source_url)
@@ -32,7 +34,8 @@
name_link = tr.cssselect('.jsn-table-column-name')[0].find('a')
member['name'] = name_link.text.strip()
member['details_url'] = urljoin(source_url, name_link.get('href'))
member['id'] = slugify_unicode(member['name'])
details_url = member['details_url'] = urljoin(source_url, name_link.get('href'))
try:
member['party'] = tr.cssselect('.jsn-table-column-country')[0].text.strip()
@@ -44,6 +47,10 @@
# .jsn-table-column-email contains the email address, but only with
# javascript turned on.
details_resp = requests.get(details_url)
details_root = lxml.html.fromstring(details_resp.text)
# import pdb;pdb.set_trace()
data.append(member)
next_links = term_root.cssselect('a[title=Next]')

0 comments on commit 5d8aaed

Please sign in to comment.