Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
eirikeikaas committed Oct 25, 2018
1 parent a859367 commit 452fd0c
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions scraper.py
Expand Up @@ -37,13 +37,13 @@ def race_link_results(url):
for event_row in event_rows:
last_cell = event_row.cssselect("td:last-child")[0]
for race_links in last_cell.iterlinks():
print get_cell_value(event_row.cssselect("td")[2], "span a").decode("utf-8").encode('ascii', 'ignore')
print get_cell_value(event_row.cssselect("td")[2], "span a").decode("utf-8")
extra = {
'date': get_cell_value(event_row.cssselect("td")[1], "span a").decode("utf-8").encode('ascii', 'ignore'),
'place': get_cell_value(event_row.cssselect("td")[2], "span a").decode("utf-8").encode('ascii', 'ignore'),
'country': get_cell_value(event_row.cssselect("td")[3], "a span").decode("utf-8").encode('ascii', 'ignore'),
'codex': get_cell_value(event_row.cssselect("td")[4], "a").decode("utf-8").encode('ascii', 'ignore'),
'discipline': get_cell_value(event_row.cssselect("td")[5], "a").decode("utf-8").encode('ascii', 'ignore'),
'date': get_cell_value(event_row.cssselect("td")[1], "span a").decode('utf8'),
'place': get_cell_value(event_row.cssselect("td")[2], "span a").decode('utf8'),
'country': get_cell_value(event_row.cssselect("td")[3], "a span").decode('utf8'),
'codex': get_cell_value(event_row.cssselect("td")[4], "a").decode('utf8'),
'discipline': get_cell_value(event_row.cssselect("td")[5], "a").decode('utf8'),
}
yield (race_links[2], extra)

Expand All @@ -65,14 +65,14 @@ def get_cell_value(element, css):
athlete_id = urlparse.parse_qs(parsed.query)['competitorid'][0]
result = {
'event': raceinfo['codex'],
'rank': result_cell.cssselect("td")[0].text_content().decode("utf-8").encode('ascii', 'ignore'),
'athlete': result_cell.cssselect("td")[1].text_content().decode("utf-8").encode('ascii', 'ignore'),
'rank': result_cell.cssselect("td")[0].text_content().decode('utf8'),
'athlete': result_cell.cssselect("td")[1].text_content().decode('utf8'),
'competitor_id': athlete_id,
'yob': result_cell.cssselect("td")[2].text_content().decode("utf-8").encode('ascii', 'ignore'),
'nation': result_cell.cssselect("td")[3].text_content().decode("utf-8").encode('ascii', 'ignore'),
'time': result_cell.cssselect("td")[4].text_content().decode("utf-8").encode('ascii', 'ignore'),
'behind': result_cell.cssselect("td")[5].text_content().decode("utf-8").encode('ascii', 'ignore'),
'points': result_cell.cssselect("td")[6].text_content().decode("utf-8").encode('ascii', 'ignore')
'yob': result_cell.cssselect("td")[2].text_content().decode('utf8'),
'nation': result_cell.cssselect("td")[3].text_content().decode('utf8'),
'time': result_cell.cssselect("td")[4].text_content().decode('utf8'),
'behind': result_cell.cssselect("td")[5].text_content().decode('utf8'),
'points': result_cell.cssselect("td")[6].text_content().decode('utf8')
}
print result
scraperwiki.sqlite.save(unique_keys=['athlete'], data=result, table_name="result")
Expand Down

0 comments on commit 452fd0c

Please sign in to comment.