From 452fd0c3cccba2b95352ef05151dc25ab7b41c69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eirik=20Eik=C3=A5s?= Date: Thu, 25 Oct 2018 13:39:56 +0200 Subject: [PATCH] Update scraper.py --- scraper.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scraper.py b/scraper.py index f8564ac..6f43778 100644 --- a/scraper.py +++ b/scraper.py @@ -37,13 +37,13 @@ def race_link_results(url): for event_row in event_rows: last_cell = event_row.cssselect("td:last-child")[0] for race_links in last_cell.iterlinks(): - print get_cell_value(event_row.cssselect("td")[2], "span a").decode("utf-8").encode('ascii', 'ignore') + print get_cell_value(event_row.cssselect("td")[2], "span a").decode("utf-8") extra = { - 'date': get_cell_value(event_row.cssselect("td")[1], "span a").decode("utf-8").encode('ascii', 'ignore'), - 'place': get_cell_value(event_row.cssselect("td")[2], "span a").decode("utf-8").encode('ascii', 'ignore'), - 'country': get_cell_value(event_row.cssselect("td")[3], "a span").decode("utf-8").encode('ascii', 'ignore'), - 'codex': get_cell_value(event_row.cssselect("td")[4], "a").decode("utf-8").encode('ascii', 'ignore'), - 'discipline': get_cell_value(event_row.cssselect("td")[5], "a").decode("utf-8").encode('ascii', 'ignore'), + 'date': get_cell_value(event_row.cssselect("td")[1], "span a").decode('utf8'), + 'place': get_cell_value(event_row.cssselect("td")[2], "span a").decode('utf8'), + 'country': get_cell_value(event_row.cssselect("td")[3], "a span").decode('utf8'), + 'codex': get_cell_value(event_row.cssselect("td")[4], "a").decode('utf8'), + 'discipline': get_cell_value(event_row.cssselect("td")[5], "a").decode('utf8'), } yield (race_links[2], extra) @@ -65,14 +65,14 @@ def get_cell_value(element, css): athlete_id = urlparse.parse_qs(parsed.query)['competitorid'][0] result = { 'event': raceinfo['codex'], - 'rank': result_cell.cssselect("td")[0].text_content().decode("utf-8").encode('ascii', 'ignore'), - 'athlete': result_cell.cssselect("td")[1].text_content().decode("utf-8").encode('ascii', 'ignore'), + 'rank': result_cell.cssselect("td")[0].text_content().decode('utf8'), + 'athlete': result_cell.cssselect("td")[1].text_content().decode('utf8'), 'competitor_id': athlete_id, - 'yob': result_cell.cssselect("td")[2].text_content().decode("utf-8").encode('ascii', 'ignore'), - 'nation': result_cell.cssselect("td")[3].text_content().decode("utf-8").encode('ascii', 'ignore'), - 'time': result_cell.cssselect("td")[4].text_content().decode("utf-8").encode('ascii', 'ignore'), - 'behind': result_cell.cssselect("td")[5].text_content().decode("utf-8").encode('ascii', 'ignore'), - 'points': result_cell.cssselect("td")[6].text_content().decode("utf-8").encode('ascii', 'ignore') + 'yob': result_cell.cssselect("td")[2].text_content().decode('utf8'), + 'nation': result_cell.cssselect("td")[3].text_content().decode('utf8'), + 'time': result_cell.cssselect("td")[4].text_content().decode('utf8'), + 'behind': result_cell.cssselect("td")[5].text_content().decode('utf8'), + 'points': result_cell.cssselect("td")[6].text_content().decode('utf8') } print result scraperwiki.sqlite.save(unique_keys=['athlete'], data=result, table_name="result")