-
Notifications
You must be signed in to change notification settings - Fork 2
/
get_olympics_sports.py
78 lines (52 loc) · 2.03 KB
/
get_olympics_sports.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import urllib.request
from bs4 import BeautifulSoup
import csv
import codecs
def remove_non_ascii_1(text):
return ''.join([i if ord(i) < 128 else ' ' for i in text])
f = open('sports_list_output.csv', 'w')
#get winter olympic data
for i in range(21):
#read in list of winter olympics as a dict
wint = csv.DictReader(open("winterolympics.csv"))
winterolympics = []
for row in wint:
winterolympics.append(row)
#set each i to be the year and location
year = (winterolympics[i]['Year'])
locationcity = (winterolympics[i]['City'])
locationcountry = (winterolympics[i]['Country'])
site = 'http://www.sports-reference.com/olympics/winter/%s/' %str(year)
page = urllib.request.urlopen(site)
soup = BeautifulSoup(page)
table = soup.find('table', {'class': ' stats_table suppress_all'})
for row in table.findAll("tr"):
cells = row.findAll("td")
year = str(year)
sport = cells[0].find(text=True)
write_to_file = "Winter," + year + "," + sport + '\n'
write_to_file = remove_non_ascii_1(write_to_file)
f.write(write_to_file)
#get summer olympic data
for i in range(28):
#read in list of summer olympics as a dict
summ = csv.DictReader(open("summerolympics.csv"))
summerolympics = []
for row in summ:
summerolympics.append(row)
#set each i to be the year and location
year = (summerolympics[i]['Year'])
locationcity = (summerolympics[i]['City'])
locationcountry = (summerolympics[i]['Country'])
site = 'http://www.sports-reference.com/olympics/summer/%s/' %str(year)
page = urllib.request.urlopen(site)
soup = BeautifulSoup(page)
table = soup.find('table', {'class': ' stats_table suppress_all'})
for row in table.findAll("tr"):
cells = row.findAll("td")
year = str(year)
sport = cells[0].find(text=True)
write_to_file = "Summer," + year + "," + sport + '\n'
write_to_file = remove_non_ascii_1(write_to_file)
f.write(write_to_file)
f.close()