In [81]:
import pbpstats
import pandas as pd
from pbpstats.client import Client
from nba_api.stats.endpoints import playercareerstats, commonplayerinfo, playerprofilev2
from nba_api.stats.static import players
from nba_api.stats.static import teams
import time
from alive_progress import alive_bar

In [2]:
#!pip install pbpstats --upgrade
#!pip install nba_api
#!pip install tqdm
#!pip install jupyter --upgrade
#!pip install ipywidgets --user
#!pip install alive-progress

In [3]:
# get_players returns a list of dictionaries, each representing a player.
nba_players = players.get_players()
print('Number of players fetched: {}'.format(len(nba_players)))
nba_players[:2]

Number of players fetched: 4723


[{'id': 76001,
  'full_name': 'Alaa Abdelnaby',
  'first_name': 'Alaa',
  'last_name': 'Abdelnaby',
  'is_active': False},
 {'id': 76002,
  'full_name': 'Zaid Abdul-Aziz',
  'first_name': 'Zaid',
  'last_name': 'Abdul-Aziz',
  'is_active': False}]

In [11]:
# Anthony Davis
ad_id = '203076'
career = playercareerstats.PlayerCareerStats(player_id=ad_id)
career.get_data_frames()[0]

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,203076,2012-13,0,1610612740,NOH,20.0,64,60,1846.0,349,...,0.751,165,357,522,63,75,112,89,158,867
1,203076,2013-14,0,1610612740,NOP,21.0,67,66,2358.0,522,...,0.791,207,466,673,105,89,189,109,200,1394
2,203076,2014-15,0,1610612740,NOP,22.0,68,68,2455.0,642,...,0.805,173,523,696,149,100,200,95,141,1656
3,203076,2015-16,0,1610612740,NOP,23.0,61,61,2164.0,560,...,0.758,130,497,627,116,78,125,121,148,1481
4,203076,2016-17,0,1610612740,NOP,24.0,75,75,2708.0,770,...,0.802,172,712,884,157,94,167,181,168,2099
5,203076,2017-18,0,1610612740,NOP,25.0,75,75,2727.0,780,...,0.828,187,644,831,174,115,193,162,159,2110
6,203076,2018-19,0,1610612740,NOP,26.0,56,56,1850.0,530,...,0.794,174,498,672,218,88,135,112,132,1452
7,203076,2019-20,0,1610612747,LAL,27.0,62,62,2131.0,551,...,0.846,142,435,577,200,91,143,154,156,1618
8,203076,2020-21,0,1610612747,LAL,28.0,36,36,1162.0,301,...,0.738,62,224,286,110,45,59,74,60,786
9,203076,2021-22,0,1610612747,LAL,29.0,40,40,1404.0,370,...,0.713,106,288,394,122,49,90,82,97,927


In [5]:
ts = teams.get_teams()

In [6]:
# get players that played in NBA season 2021-22
# calculate career averages, group by team
# take care of players that were traded. Remove entry for team for which they played the least.
# export JSON file, see template

In [9]:
nba_players = [i for i in players.get_players() if i['is_active']==True]
len(nba_players)

587

In [56]:
# get stats
def get_last_season_players(player_ids):
    
    custom_headers = {
    'Host': 'stats.nba.com',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    }
    
    l = []
    eligible_seasons = ['2017-18','2018-19','2019-20','2020-21','2021-22']
    with alive_bar(total=len(player_ids), force_tty=True) as bar:
        for id2 in player_ids:
            bar()
            #career = playercareerstats.PlayerCareerStats(player_id=id2)
            career = playerprofilev2.PlayerProfileV2(player_id=str(id2))
            df = career.get_data_frames()[0]
            seasons_played = list(df['SEASON_ID'].unique())
            if len(set(seasons_played).intersection(eligible_seasons)) > 0:
                l.append(df)
    return l

In [95]:
#player_ids = [i['id'] for i in nba_players]
active_players = players.get_active_players()
active_player_ids = [i['id'] for i in active_players]
prev_5seasons_players = get_last_season_players(active_player_ids)

|████████████████████████████████████████| 587/587 [100%] in 9:55.3 (0.99/s)                                             ▂▄▆ 427/587 [73%] in 6:39 (1.1/s, eta: 2:30) 


In [97]:
len(prev_5seasons_players)

569

In [59]:
df = pd.concat([i for i in last_season_players])

In [61]:
df.to_csv('all_players.csv', index=False)

## Async version

In [70]:
import trio

In [86]:
def execute(list_of_ids):
    
    eligible_seasons = ['2017-18','2018-19','2019-20','2020-21','2021-22']
    
    async def get_player_data(id1, l):
        career = playerprofilev2.PlayerProfileV2(player_id=str(id1))
        df = career.get_data_frames()[0]
        #seasons_played = list(df['SEASON_ID'].unique())
        #if len(set(seasons_played).intersection(eligible_seasons)) > 0:
        #    l.append(df)
        l.append(df)

    async def parent(list_of_ids):
        start = time.time()
        print("parent: started! Num_ids {}".format(len(list_of_ids)))
        l = []
        async with trio.open_nursery() as nursery:
            for player_id in list_of_ids:
                nursery.start_soon(get_player_data, player_id, l)

            print("parent: waiting for children to finish...")
            # -- we exit the nursery block here --
        print("parent: all done!")
        print ('time elapsed: {:.3f} s'.format(time.time() - start))
        return l
    
    l2 = trio.run(parent, list_of_ids)
    print ('l2 {}'.format(len(l2)))
    return l2

In [87]:
list_of_ids = [76001, 76002]
l2 = execute(list_of_ids)

parent: started! Num_ids 2
parent: waiting for children to finish...
parent: all done!
time elapsed: 0.123 s
l2 2


In [91]:
#player_ids = [i['id'] for i in nba_players]
active_players = players.get_active_players()
active_player_ids = [i['id'] for i in active_players]
prev_5seasons_players = execute(active_player_ids)

parent: started! Num_ids 587
parent: waiting for children to finish...
parent: all done!
time elapsed: 1017.712 s
l2 587


In [93]:
prev_5seasons_players[-1]

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1630173,2020-21,0,1610612748,MIA,21.0,61,4,737,124,...,0.509,73,135,208,29,20,28,43,91,304
1,1630173,2021-22,0,1610612761,TOR,22.0,73,28,1725,265,...,0.595,146,327,473,82,37,41,84,151,664


In [94]:
df = pd.concat([i for i in prev_5seasons_players])
df.to_csv('all_players_v2.csv', index=False)

In [106]:
#ad_df = prev_5seasons_players = execute([ad_id])

In [105]:
203076 in active_player_ids

True

In [103]:
[i for i in active_players if i['last_name']=='Davis']

[{'id': 203076,
  'full_name': 'Anthony Davis',
  'first_name': 'Anthony',
  'last_name': 'Davis',
  'is_active': True},
 {'id': 1629056,
  'full_name': 'Terence Davis',
  'first_name': 'Terence',
  'last_name': 'Davis',
  'is_active': True}]

In [111]:
df[df.PLAYER_ID==int(1627826)]

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1627826,2016-17,0,1610612747,LAL,20.0,38,11,609,126,...,0.653,41,118,159,30,14,33,30,66,284
1,1627826,2017-18,0,1610612747,LAL,21.0,43,0,410,61,...,0.765,45,78,123,25,8,15,26,47,161
2,1627826,2018-19,0,1610612747,LAL,22.0,33,12,516,112,...,0.864,54,108,162,25,4,27,33,73,281
3,1627826,2018-19,0,1610612746,LAC,22.0,26,25,524,100,...,0.733,61,139,200,38,10,24,37,64,244
4,1627826,2018-19,0,0,TOT,22.0,59,37,1039,212,...,0.802,115,247,362,63,14,51,70,137,525
5,1627826,2019-20,0,1610612746,LAC,23.0,72,70,1326,236,...,0.747,197,346,543,82,16,66,61,168,596
6,1627826,2020-21,0,1610612746,LAC,24.0,72,33,1609,257,...,0.789,189,330,519,90,24,62,81,187,650
7,1627826,2021-22,0,1610612746,LAC,25.0,76,76,1852,310,...,0.727,217,427,644,120,36,77,114,203,785


In [110]:
df[df.TEAM_ABBREVIATION=='TOT']

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
4,1627826,2018-19,00,0,TOT,22.0,59,37,1039,212,...,0.802,115,247,362,63,14,51,70,137,525
9,201152,2014-15,00,0,TOT,27.0,76,68,2434,451,...,0.655,127,284,411,173,124,25,117,171,1071
18,201152,2021-22,00,0,TOT,33.0,52,1,845,141,...,0.469,80,128,208,104,54,18,52,81,322
2,1630589,2021-22,00,0,TOT,23.0,4,0,15,1,...,1.0,1,2,3,2,0,1,0,0,5
5,1626153,2018-19,00,0,TOT,27.0,75,13,1698,242,...,0.793,68,198,266,248,88,30,77,103,653
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,2546,2010-11,00,0,TOT,27.0,77,77,2751,684,...,0.838,118,445,563,221,68,46,206,224,1970
12,202329,2020-21,00,0,TOT,30.0,23,14,434,38,...,0.818,22,88,110,31,19,9,28,29,102
5,1628386,2020-21,00,0,TOT,23.0,63,45,1864,298,...,0.703,196,435,631,106,32,90,100,96,806
4,1629638,2021-22,00,0,TOT,23.0,65,21,1466,253,...,0.743,37,150,187,156,46,23,93,103,692
