Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
eirikeikaas committed Oct 25, 2018
1 parent 8616117 commit ccfdef8
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion scraper.py
Expand Up @@ -5,6 +5,7 @@

import scraperwiki
import lxml.html
import urlparse
from pprint import pprint

result_schema = {
Expand Down Expand Up @@ -56,7 +57,21 @@ def get_cell_value(element, css):
result_table = root.cssselect("table.footable")[0]
result_cells = result_table.cssselect("tr")
for result_cell in result_cells:
print result_cell
athlete_url = result_cell.cssselect("td")[2].csselect("a")[0].get("href")
parsed = urlparse.urlparse(athlete_url)
athlete_id = urlparse.parse_qs(parsed.query)['competitorid']
result = {
'event': raceinfo['codex'],
'rank': get_cell_value(result_cell.cssselect("td")[1], ""),
'athlete': get_cell_value(result_cell.cssselect("td")[2], "a"),
'competitor_id': athlete_id,
'yob': get_cell_value(result_cell.cssselect("td")[3], ""),
'nation': get_cell_value(result_cell.cssselect("td")[4], ""),
'time': get_cell_value(result_cell.cssselect("td")[5], ""),
'behind': get_cell_value(result_cell.cssselect("td")[6], ""),
'points': get_cell_value(result_cell.cssselect("td")[7], "")
}
print result
print link
print raceinfo['date']
scraperwiki.sqlite.save(unique_keys=['codex'], data=raceinfo, table_name="data")
Expand Down

0 comments on commit ccfdef8

Please sign in to comment.