Skip to content

Commit

Permalink
urlopen the url and update the schema of the target page
Browse files Browse the repository at this point in the history
  • Loading branch information
cirotix committed Dec 2, 2014
1 parent 7f450ab commit 58848f1
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions scraper.py 100644 → 100755
Expand Up @@ -7,7 +7,7 @@
from pprint import pprint
from datetime import datetime

import scraperwiki
#import scraperwiki

URL_SEARCH = 'http://ec.europa.eu/transparencyregister/public/consultation/search.do'
URL = 'http://ec.europa.eu/transparencyregister/public/consultation/search.do?country=%s&d-4959990-p=%s'
Expand Down Expand Up @@ -94,11 +94,11 @@ def get_country_index():
break

def get_entry(url):
doc = html.parse(url)
doc = html.parse(urlopen(url))
entry = {}
profile = doc.find('//div[@class="box"]')
entry['name'] = profile.findtext('.//h4/b').strip()
id, _, registration_date, _, update_date = list(profile.findall('.//span'))
id, _, registration_date, _, update_date, _, year = list(profile.findall('.//span'))
entry['id'] = id.text.strip()
entry['register_url'] = url
entry['retrieved_at'] = datetime.now().isoformat()
Expand Down

0 comments on commit 58848f1

Please sign in to comment.