In [25]:
import requests
from tqdm import trange
import json
import time
import pandas as pd
import numpy as np

In [57]:
test=requests.get('https://www.balldontlie.io/api/v1/stats?seasons[]=2018&page=1').json()

In [58]:
test['meta']

{'total_pages': 1317,
 'current_page': 1,
 'next_page': 2,
 'per_page': 25,
 'total_count': 32901}

## data struction stats dictionary is per player per game.
1. stats[dict]
    1. data[list]
        1. player[dict]
            1. id[int]
            2. first_name[str]
            3. last_name[str]
        2. game[dict]
            1. id[int]
            2. ...
        3. pts[int]
    2. meta[list]

In [41]:
max_page=test['meta']['total_pages']

## get all stats for season 2018-2019

In [44]:
all_stats=[]
for i in trange(max_page):
    stats=requests.get(f'https://www.balldontlie.io/api/v1/stats?seasons[]=2018&per_page=100&page={i+1}').json()
#     if stats["meta"]["next_page"]:
    all_stats.extend(stats['data'])
    time.sleep(1/2)
        

100%|██████████| 330/330 [06:04<00:00,  1.11s/it]


##  verifying total counts match

In [46]:
len(all_stats)

32901

## save Json file

In [54]:
with open('all_stats.json', 'w') as f:
    json.dump(all_stats, f)

## get top 10 Scorers

In [94]:
pts_all_game=[]
for i in trange(len(all_stats)):
    if all_stats[i]['player']:
        player_id=all_stats[i]['player']['id']
        game_id=all_stats[i]['game']['id']
        pts=all_stats[i]['pts']
        pts_each_game = (player_id, game_id, pts)
        pts_all_game.append(pts_each_game)
    
#     player_fname=all_stats[i]['player']['first_name']
#     player_lname=all_stats[i]['player']['last_name']  
# all_stats['pts']


100%|██████████| 32901/32901 [00:00<00:00, 412062.35it/s]


In [96]:
df=pd.DataFrame.from_records(pts_all_game, columns=['player_id','game_id','pts'])

In [204]:
# player_df= df.groupby('player_id').agg({'game_id':'min', 'C': 'sum'})
top_10_df = df.groupby('player_id')\
            .agg({'game_id':'size', 'pts':'mean'}) \
            .rename(columns={'game_id':'num_game','pts':'avg_pts_per_game'})\
            .sort_values(by=['avg_pts_per_game'], ascending=False)\
            .reset_index()\
            .head(10)

In [205]:
top_10_df

Unnamed: 0,player_id,num_game,avg_pts_per_game
0,192,92,35.597826
1,115,93,27.365591
2,237,59,27.338983
3,172,85,27.190476
4,274,88,27.139535
5,140,92,26.956522
6,145,78,26.077922
7,15,92,25.956522
8,37,84,25.869048
9,465,82,25.865854


## Get all players information

In [162]:
IDs=top_10_df['id'].to_list()

In [169]:
all_infos=[]
for i in IDs:
    player_info=requests.get(f'https://www.balldontlie.io/api/v1/players/{i}').json()
    all_infos.append(player_info)
    time.sleep(1/2)

In [177]:
list(all_infos[0].keys())

['id',
 'first_name',
 'height_feet',
 'height_inches',
 'last_name',
 'position',
 'team',
 'weight_pounds']

In [235]:
info_column=['id',
    'first_name',
    'last_name',
    'height_feet',
    'height_inches',
    'position',
    'team',
    'weight_pounds']

In [239]:
info_df=pd.DataFrame.from_records(all_infos, columns=info_column)\
        .drop(columns=['team'])\
        .rename(columns={'id':'player_id'})

In [240]:
info_df

Unnamed: 0,player_id,first_name,last_name,height_feet,height_inches,position,weight_pounds
0,192,James,Harden,6,5,G,220
1,115,Stephen,Curry,6,3,G,190
2,237,LeBron,James,6,8,F,250
3,172,Paul,George,6,9,F,220
4,274,Kawhi,Leonard,6,7,F,230
5,140,Kevin,Durant,6,9,F,240
6,145,Joel,Embiid,7,0,F-C,250
7,15,Giannis,Antetokounmpo,6,11,F,242
8,37,Bradley,Beal,6,5,G,207
9,465,Kemba,Walker,6,1,G,184


In [260]:
result= top_10_df.merge(info_df, on='player_id').reset_index().rename(columns={'index':'rank'})

In [267]:
result.to_csv('result.csv',index=False)