# Fetching Data

In [1]:
import requests
import pandas as pd
import time

In [2]:
pd.set_option('display.max_columns', None)

In [6]:
def make_request(endpoint, params=None, record_path=None, verbose=False):
    root = "https://www.balldontlie.io/api/v1/"
    response = requests.get(root + endpoint, params=params)
    if response.status_code != 200:
        print(response.status_code)
        return response
    if verbose: print("Success!")
    res = response.json()
    res = pd.json_normalize(res, record_path=record_path)
    return res

### player data

In [4]:
player_data = make_request("players", params={"page":1, "per_page":100}, record_path="data")

for i in range(2,39):
    print(i)
    time.sleep(0.5)
    new_data = make_request("players", params={"page":i, "per_page":100}, record_path="data")
    player_data = player_data.append(new_data)

player_data.set_index("id", inplace=True)

player_data.to_csv("data/players.csv")

### game data

In [40]:
game_data = make_request("games", params={"page":1, "per_page":100, "seasons":[2017,2018,2019,2020]}, record_path="data", verbose=True)

Success!


In [41]:
%%time
for i in range(2,501):
    if i%10 == 0: print(i)
    time.sleep(1.2)
    new_data = make_request("games", params={"page":i, "per_page":100, "seasons":[2017,2018,2019,2020]}, record_path="data")
    game_data = game_data.append(new_data)

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
CPU times: user 41.9 s, sys: 2 s, total: 43.9 s
Wall time: 17min 51s


In [45]:
game_data.set_index("id", inplace=True)

In [51]:
game_data.to_csv("data/games.csv")

### stats data

In [113]:
# there are over 11000 pages!
stats_data_meta = make_request("stats", params={"page":1, "per_page":100, "seasons[]":2015}, record_path=None)

In [114]:
stats_data_meta

Unnamed: 0,data,meta.total_pages,meta.current_page,meta.next_page,meta.per_page,meta.total_count
0,"[{'id': 748452, 'ast': None, 'blk': None, 'dre...",337,1,2,100,33659


In [115]:
stats_data = pd.read_csv("data/stats_raw.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [116]:
stats_data

Unnamed: 0,id,ast,blk,dreb,fg3_pct,fg3a,fg3m,fg_pct,fga,fgm,ft_pct,fta,ftm,min,oreb,pf,pts,reb,stl,turnover,game.id,game.date,game.home_team_id,game.home_team_score,game.period,game.postseason,game.season,game.status,game.time,game.visitor_team_id,game.visitor_team_score,player.id,player.first_name,player.height_feet,player.height_inches,player.last_name,player.position,player.team_id,player.weight_pounds,team.id,team.abbreviation,team.city,team.conference,team.division,team.full_name,team.name,player
0,2848769,3.0,1.0,4.0,50.0,2.0,1.0,43.8,16.0,7.0,100.0,7.0,7.0,24:56,1.0,3.0,22.0,5.0,3.0,1.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,140.0,Kevin,6.0,9.0,Durant,F,3.0,240.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
1,2848775,1.0,0.0,0.0,50.0,2.0,1.0,75.0,4.0,3.0,0.0,0.0,0.0,17:17,0.0,3.0,7.0,0.0,1.0,1.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,188.0,Jeff,6.0,9.0,Green,F,3.0,235.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
2,2848770,1.0,1.0,9.0,0.0,0.0,0.0,66.7,3.0,2.0,0.0,2.0,0.0,17:03,2.0,2.0,4.0,11.0,1.0,3.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,250.0,DeAndre,6.0,11.0,Jordan,C,3.0,265.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
3,2848759,10.0,0.0,1.0,20.0,10.0,2.0,33.3,21.0,7.0,100.0,4.0,4.0,30:19,3.0,1.0,20.0,4.0,2.0,3.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,115.0,Stephen,6.0,3.0,Curry,G,10.0,190.0,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
4,2848772,4.0,0.0,3.0,57.1,7.0,4.0,62.5,16.0,10.0,100.0,2.0,2.0,25:18,1.0,3.0,26.0,4.0,0.0,1.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,228.0,Kyrie,6.0,3.0,Irving,G,3.0,193.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159980,808528,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1:04,0.0,0.0,0.0,0.0,0.0,0.0,34614,2017-06-12T00:00:00.000Z,10,129,4,True,2016,Final,,6,120,1522.0,Matt,,,Barnes,,13.0,,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
159981,808529,0.0,1.0,3.0,0.0,0.0,0.0,0.5,4.0,2.0,0.0,0.0,0.0,10:31,0.0,1.0,4.0,3.0,0.0,1.0,34614,2017-06-12T00:00:00.000Z,10,129,4,True,2016,Final,,6,120,1481.0,David,,,West,,19.0,,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
159982,808530,,,,,,,,,,,,,,,,,,,,34614,2017-06-12T00:00:00.000Z,10,129,4,True,2016,Final,,6,120,99.0,Ian,6.0,3.0,Clark,G,19.0,175.0,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
159983,808531,,,,,,,,,,,,,,,,,,,,34614,2017-06-12T00:00:00.000Z,10,129,4,True,2016,Final,,6,120,2116.0,James,,,Michael McAdoo,,10.0,,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,


In [117]:
for i in range(1, 338):
    print(i)
    time.sleep(1.1)
    new_data = make_request("stats", params={"page":i, "per_page":100, "seasons[]":2015}, record_path="data")
    stats_data = stats_data.append(new_data)
print("Done!")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [121]:
stats_data

Unnamed: 0_level_0,ast,blk,dreb,fg3_pct,fg3a,fg3m,fg_pct,fga,fgm,ft_pct,fta,ftm,min,oreb,pf,pts,reb,stl,turnover,game.id,game.date,game.home_team_id,game.home_team_score,game.period,game.postseason,game.season,game.status,game.time,game.visitor_team_id,game.visitor_team_score,player.id,player.first_name,player.height_feet,player.height_inches,player.last_name,player.position,player.team_id,player.weight_pounds,team.id,team.abbreviation,team.city,team.conference,team.division,team.full_name,team.name,player
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
2848769,3.0,1.0,4.0,50.0,2.0,1.0,43.8,16.0,7.0,100.0,7.0,7.0,24:56,1.0,3.0,22.0,5.0,3.0,1.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,140.0,Kevin,6.0,9.0,Durant,F,3.0,240.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
2848775,1.0,0.0,0.0,50.0,2.0,1.0,75.0,4.0,3.0,0.0,0.0,0.0,17:17,0.0,3.0,7.0,0.0,1.0,1.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,188.0,Jeff,6.0,9.0,Green,F,3.0,235.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
2848770,1.0,1.0,9.0,0.0,0.0,0.0,66.7,3.0,2.0,0.0,2.0,0.0,17:03,2.0,2.0,4.0,11.0,1.0,3.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,250.0,DeAndre,6.0,11.0,Jordan,C,3.0,265.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
2848759,10.0,0.0,1.0,20.0,10.0,2.0,33.3,21.0,7.0,100.0,4.0,4.0,30:19,3.0,1.0,20.0,4.0,2.0,3.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,115.0,Stephen,6.0,3.0,Curry,G,10.0,190.0,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
2848772,4.0,0.0,3.0,57.1,7.0,4.0,62.5,16.0,10.0,100.0,2.0,2.0,25:18,1.0,3.0,26.0,4.0,0.0,1.0,127502,2020-12-22T00:00:00.000Z,3,125,4,False,2020,Final,,10,99,228.0,Kyrie,6.0,3.0,Irving,G,3.0,193.0,3,BKN,Brooklyn,East,Atlantic,Brooklyn Nets,Nets,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
774836,0.0,0.0,0.0,1.0,1.0,1.0,0.5,2.0,1.0,0.0,0.0,0.0,4:24,0.0,1.0,3.0,0.0,0.0,0.0,33921,2016-06-19T00:00:00.000Z,10,89,4,True,2015,Final,,6,93,1476.0,Leandro,,,Barbosa,,24.0,,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
774837,0.0,2.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,4:34,2.0,0.0,0.0,4.0,0.0,0.0,33921,2016-06-19T00:00:00.000Z,10,89,4,True,2015,Final,,6,93,1818.0,Marreese,,,Speights,,23.0,,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
774838,,,,,,,,,,,,,,,,,,,,33921,2016-06-19T00:00:00.000Z,10,89,4,True,2015,Final,,6,93,99.0,Ian,6.0,3.0,Clark,G,19.0,175.0,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,
774839,,,,,,,,,,,,,,,,,,,,33921,2016-06-19T00:00:00.000Z,10,89,4,True,2015,Final,,6,93,2116.0,James,,,Michael McAdoo,,10.0,,10,GSW,Golden State,West,Pacific,Golden State Warriors,Warriors,


In [119]:
stats_data.set_index("id", inplace=True)

In [120]:
stats_data.to_csv("data/stats_raw.csv")

In [122]:
# there are over 11000 pages!
all_stats_data_meta = make_request("stats", params={"page":1, "per_page":100}, record_path=None)

In [123]:
all_stats_data_meta

Unnamed: 0,data,meta.total_pages,meta.current_page,meta.next_page,meta.per_page,meta.total_count
0,"[{'id': 1069008, 'ast': 0, 'blk': 1, 'dreb': 2...",11482,1,2,100,1148155


In [124]:
# there are over 11000 pages!
all_stats_data = make_request("stats", params={"page":1, "per_page":100}, record_path="data")

In [125]:
all_stats_data

Unnamed: 0,id,ast,blk,dreb,fg3_pct,fg3a,fg3m,fg_pct,fga,fgm,ft_pct,fta,ftm,min,oreb,pf,pts,reb,stl,turnover,game.id,game.date,game.home_team_id,game.home_team_score,game.period,game.postseason,game.season,game.status,game.time,game.visitor_team_id,game.visitor_team_score,player.id,player.first_name,player.height_feet,player.height_inches,player.last_name,player.position,player.team_id,player.weight_pounds,team.id,team.abbreviation,team.city,team.conference,team.division,team.full_name,team.name
0,1069008,0.0,1.0,2.0,0.200,5.0,1.0,0.333,9.0,3.0,0.000,0.0,0.0,20:08,0.0,0.0,7.0,2.0,0.0,2.0,45237,2019-01-17T00:00:00.000Z,4,114,4,False,2018,Final,,26,95,415,Iman,6.0,5.0,Shumpert,G,3,215.0,26,SAC,Sacramento,West,Pacific,Sacramento Kings,Kings
1,1069009,4.0,0.0,5.0,0.000,2.0,0.0,0.200,5.0,1.0,0.000,0.0,0.0,19:22,1.0,1.0,2.0,6.0,0.0,0.0,45237,2019-01-17T00:00:00.000Z,4,114,4,False,2018,Final,,26,95,49,Nemanja,6.0,10.0,Bjelica,F,16,234.0,26,SAC,Sacramento,West,Pacific,Sacramento Kings,Kings
2,1069010,4.0,1.0,5.0,0.000,0.0,0.0,0.667,6.0,4.0,0.000,0.0,0.0,27:24,6.0,2.0,8.0,11.0,3.0,2.0,45237,2019-01-17T00:00:00.000Z,4,114,4,False,2018,Final,,26,95,91,Willie,7.0,0.0,Cauley-Stein,C,7,240.0,26,SAC,Sacramento,West,Pacific,Sacramento Kings,Kings
3,1069011,1.0,0.0,1.0,0.545,11.0,6.0,0.500,18.0,9.0,0.000,0.0,0.0,32:06,0.0,2.0,24.0,1.0,2.0,0.0,45237,2019-01-17T00:00:00.000Z,4,114,4,False,2018,Final,,26,95,210,Buddy,6.0,4.0,Hield,G,26,214.0,26,SAC,Sacramento,West,Pacific,Sacramento Kings,Kings
4,1069012,8.0,1.0,5.0,0.000,2.0,0.0,0.400,10.0,4.0,0.667,3.0,2.0,30:30,0.0,4.0,10.0,5.0,1.0,4.0,45237,2019-01-17T00:00:00.000Z,4,114,4,False,2018,Final,,26,95,161,De'Aaron,6.0,3.0,Fox,G,26,175.0,26,SAC,Sacramento,West,Pacific,Sacramento Kings,Kings
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1069101,2.0,0.0,6.0,0.000,0.0,0.0,0.429,7.0,3.0,0.000,2.0,0.0,21:38,2.0,1.0,6.0,8.0,1.0,0.0,45167,2019-01-17T00:00:00.000Z,12,96,4,False,2018,Final,,23,120,406,Domantas,6.0,11.0,Sabonis,F,12,240.0,12,IND,Indiana,East,Central,Indiana Pacers,Pacers
96,1069102,0.0,0.0,1.0,0.250,4.0,1.0,0.429,14.0,6.0,0.000,0.0,0.0,14:51,1.0,2.0,13.0,2.0,1.0,0.0,45167,2019-01-17T00:00:00.000Z,12,96,4,False,2018,Final,,23,120,150,Tyreke,6.0,6.0,Evans,G,12,220.0,12,IND,Indiana,East,Central,Indiana Pacers,Pacers
97,1069103,0.0,0.0,0.0,0.000,0.0,0.0,1.000,1.0,1.0,0.000,0.0,0.0,5:57,0.0,0.0,2.0,0.0,1.0,0.0,45167,2019-01-17T00:00:00.000Z,12,96,4,False,2018,Final,,23,120,270,TJ,6.0,10.0,Leaf,F,25,225.0,12,IND,Indiana,East,Central,Indiana Pacers,Pacers
98,1069104,0.0,0.0,0.0,0.000,3.0,0.0,0.250,4.0,1.0,0.000,0.0,0.0,11:36,1.0,1.0,2.0,1.0,0.0,1.0,45167,2019-01-17T00:00:00.000Z,12,96,4,False,2018,Final,,23,120,305,Doug,6.0,8.0,McDermott,F,12,225.0,12,IND,Indiana,East,Central,Indiana Pacers,Pacers


In [126]:
for i in range(2, 11483):
    if i%100 == 0: print(i)
    time.sleep(1.5)
    new_data = make_request("stats", params={"page":i, "per_page":100}, record_path="data")
    all_stats_data = all_stats_data.append(new_data)
print("Done!")

In [126]:
#all_stats_data.set_index("id", inplace=True)
#all_stats_data.to_csv("data/all_stats.csv")

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
Done!
