In [187]:
from pymongo import MongoClient
import pprint

import pandas as pd

import numpy as np

import json
import time

In [119]:
# Import proper modules from nba_api

from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import FranchiseHistory, PlayerDashboardByYearOverYear, CommonPlayerInfo, CommonAllPlayers

In [3]:
# Compile basic info of all team histories to account for previously defunct franchises and teams moving location

all_teams = FranchiseHistory()
all_teams = all_teams.get_dict()

In [4]:
# Investigate structure of the teams dictionary
all_teams.keys()

dict_keys(['resource', 'parameters', 'resultSets'])

In [5]:
print(f"'resource' type: {type(all_teams['resource'])}")
print(f"'parameters' type: {type(all_teams['parameters'])}")
print(f"'resultSets' type: {type(all_teams['resultSets'])}")

'resource' type: <class 'str'>
'parameters' type: <class 'dict'>
'resultSets' type: <class 'list'>


In [6]:
all_teams['parameters'].keys()

dict_keys(['LeagueID'])

In [7]:
len(all_teams['resultSets'])

2

In [8]:
for i in all_teams['resultSets']:
    print(type(i))

<class 'dict'>
<class 'dict'>


In [9]:
all_teams['resultSets'][0].keys()

dict_keys(['name', 'headers', 'rowSet'])

In [10]:
for i in range(len(all_teams['resultSets'])):
    print(all_teams['resultSets'][i].keys())

dict_keys(['name', 'headers', 'rowSet'])
dict_keys(['name', 'headers', 'rowSet'])


In [11]:
for i in range(len(all_teams['resultSets'])):
    print(f"{all_teams['resultSets'][i]['name']}, {all_teams['resultSets'][i]['headers']}, resultSets type: {type(all_teams['resultSets'][i]['rowSet'])}")


FranchiseHistory, ['LEAGUE_ID', 'TEAM_ID', 'TEAM_CITY', 'TEAM_NAME', 'START_YEAR', 'END_YEAR', 'YEARS', 'GAMES', 'WINS', 'LOSSES', 'WIN_PCT', 'PO_APPEARANCES', 'DIV_TITLES', 'CONF_TITLES', 'LEAGUE_TITLES'], resultSets type: <class 'list'>
DefunctTeams, ['LEAGUE_ID', 'TEAM_ID', 'TEAM_CITY', 'TEAM_NAME', 'START_YEAR', 'END_YEAR', 'YEARS', 'GAMES', 'WINS', 'LOSSES', 'WIN_PCT', 'PO_APPEARANCES', 'DIV_TITLES', 'CONF_TITLES', 'LEAGUE_TITLES'], resultSets type: <class 'list'>


# So we now know that the full team history dataset contains a dictionary, which holds a list of two sub-dictionaries of current and defunct teams' histories.


In [12]:
len(all_teams['resultSets'][0]['rowSet']),len(all_teams['resultSets'][1]['rowSet'])

(74, 15)

In [26]:
# Get Unique Team IDs over History

unique_id = []

for i in all_teams['resultSets'][0]['rowSet']:
    print(f'Current Team:{i}')
    unique_id.append(i[1])

for i in all_teams['resultSets'][1]['rowSet']:
    print(f'Defunct Team: {i}')
    unique_id.append(i[1])

unique_id = np.unique(unique_id)

Current Team:['00', 1610612737, 'Atlanta', 'Hawks', '1949', '2019', 71, 5621, 2767, 2854, 0.492, 46, 11, 0, 1]
Current Team:['00', 1610612737, 'Atlanta', 'Hawks', '1968', '2019', 52, 4201, 2068, 2133, 0.492, 33, 5, 0, 0]
Current Team:['00', 1610612737, 'St. Louis', 'Hawks', '1955', '1967', 13, 1008, 555, 453, 0.55, 12, 6, 0, 1]
Current Team:['00', 1610612737, 'Milwaukee', 'Hawks', '1951', '1954', 4, 280, 90, 190, 0.321, 0, 0, 0, 0]
Current Team:['00', 1610612737, 'Tri-Cities', 'Blackhawks', '1949', '1950', 2, 132, 54, 78, 0.409, 1, 0, 0, 0]
Current Team:['00', 1610612738, 'Boston', 'Celtics', '1946', '2019', 74, 5789, 3421, 2367, 0.59, 56, 31, 9, 17]
Current Team:['00', 1610612751, 'Brooklyn', 'Nets', '1976', '2019', 44, 3542, 1480, 2062, 0.417, 20, 4, 2, 0]
Current Team:['00', 1610612751, 'Brooklyn', 'Nets', '2012', '2019', 8, 638, 272, 366, 0.426, 4, 0, 0, 0]
Current Team:['00', 1610612751, 'New Jersey', 'Nets', '1977', '2011', 35, 2822, 1186, 1636, 0.42, 16, 4, 2, 0]
Current Team:['

In [18]:
unique_id

array([1610610023, 1610610024, 1610610025, 1610610026, 1610610027,
       1610610028, 1610610029, 1610610030, 1610610031, 1610610032,
       1610610033, 1610610034, 1610610035, 1610610036, 1610610037,
       1610612737, 1610612738, 1610612739, 1610612740, 1610612741,
       1610612742, 1610612743, 1610612744, 1610612745, 1610612746,
       1610612747, 1610612748, 1610612749, 1610612750, 1610612751,
       1610612752, 1610612753, 1610612754, 1610612755, 1610612756,
       1610612757, 1610612758, 1610612759, 1610612760, 1610612761,
       1610612762, 1610612763, 1610612764, 1610612765, 1610612766])

# Now that we have unique team ID's, we need to build the bridge between individual teams, and pulling individual player data per team. 

The api documentation tells us that we can get individual player data within the PlayerDashboardByYearOverYear() object

Let's dive into the dataset to determine how to best compile player data by season.



In [102]:
one_player = PlayerDashboardByYearOverYear(player_id = 2544, season = '2017-18')

In [103]:
one_player = one_player.get_dict()

In [104]:
one_player.keys()

dict_keys(['resource', 'parameters', 'resultSets'])

In [105]:
for i in one_player.keys():
    print(f' {i}: type {type(one_player[i])}')

 resource: type <class 'str'>
 parameters: type <class 'dict'>
 resultSets: type <class 'list'>


In [107]:
len(one_player['resultSets'])

2

In [108]:
for i in one_player['resultSets']:
    print(type(i))

<class 'dict'>
<class 'dict'>


In [109]:
for i in one_player['resultSets']:
    print(i.keys())

dict_keys(['name', 'headers', 'rowSet'])
dict_keys(['name', 'headers', 'rowSet'])


In [110]:
for i in one_player['resultSets']:
    for k in i.keys():
        print(f'{k}:{type(i[k])}')
        print(i[k])

name:<class 'str'>
OverallPlayerDashboard
headers:<class 'list'>
['GROUP_SET', 'GROUP_VALUE', 'TEAM_ID', 'TEAM_ABBREVIATION', 'MAX_GAME_DATE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'CFID', 'CFPARAMS']
rowSet:<class 'list'>
[['Overall', '2017-18', 1610612739, 'CLE', '2018-04-11T00:00:00', 82, 50, 32, 0.61, 3025.588333333333, 857, 1580, 0.542, 149, 406, 0.367, 388, 531, 0.731, 97, 612, 709, 747, 347, 116, 71, 68, 136, 439, 22

In summary, the get_players dictionary object has a resultsSets key, which contains two sub-dictionaries on a per player basis. One for overall data at ['resultSets'][0], and the other for yearly statistics at ['resultSets'][1].

Now we can create a list of unique players ids to use for instantiation of PlayerDashboardByYearOverYear() objects to lift datasets per player.

Because we are only concerned with the 20 most recent seasons, we are only concerned with players who either ended their careers after the 1998-99 season, or those who started their careers in that same season or later.

The CommonAllPlayers() class will allows us to identify those individuals

In [140]:
season = CommonAllPlayers()
season = season.get_dict()

In [141]:
for i in season.keys():
    print(f'{i}, {type(season[i])}')

resource, <class 'str'>
parameters, <class 'dict'>
resultSets, <class 'list'>


In [142]:
len(season['resultSets'])

1

In [144]:
type(season['resultSets'][0])

dict

In [147]:
season['resultSets'][0].keys()

dict_keys(['name', 'headers', 'rowSet'])

In [151]:
type(season['resultSets'][0]['rowSet'])

list

In [152]:
len(season['resultSets'][0]['rowSet'])

4509

In [128]:
for idx,head in enumerate(season['resultSets'][0]['headers']):
    print(f'{head}: {idx}')

PERSON_ID: 0
DISPLAY_LAST_COMMA_FIRST: 1
DISPLAY_FIRST_LAST: 2
ROSTERSTATUS: 3
FROM_YEAR: 4
TO_YEAR: 5
PLAYERCODE: 6
TEAM_ID: 7
TEAM_CITY: 8
TEAM_NAME: 9
TEAM_ABBREVIATION: 10
TEAM_CODE: 11
GAMES_PLAYED_FLAG: 12
OTHERLEAGUE_EXPERIENCE_CH: 13


Based on the above, for a given player, if we want to determine when their last game was played, or if they are still active, we can index into the 4th (start year) and 5th index (end year) of the ['rowSet'] entries.

Let's determine which players are relevant to us by iterating through the entire ['rowSet']

In [170]:
player = []
for i in season['resultSets'][0]['rowSet']:
    if int(i[4]) >= 1998 or int(i[5]) >=1999:
        player.append(i[0])

player = np.unique(player)
player
    

array([      3,      15,      21, ..., 1629750, 1629752, 1629760])

We now have a list of unique player ID's that are pertinent to our inquiry. We can plug these back into the PlayerDashboardByYearOverYear() object to get our relevant data.

We'll upload this data to MongoDb, to then load back into a dataframe and begin our analysis. 

In [186]:
for p in player:
    one_player = PlayerDashboardByYearOverYear(player_id = p)
    one_player = one_player.get_dict()
    for i in (one_player['resultSets'][1]['rowSet']):
        for idx, val in enumerate(i):
            d_play = dict()
            d_play[one_player['resultSets'][1]['headers'][idx]] = val


GROUP_SET: By Year
GROUP_VALUE: 2014-15
TEAM_ID: 1610612758
TEAM_ABBREVIATION: SAC
MAX_GAME_DATE: 2015-04-10T00:00:00
GP: 47
W: 16
L: 31
W_PCT: 0.34
MIN: 763.8416666666667
FGM: 58
FGA: 137
FG_PCT: 0.423
FG3M: 0
FG3A: 2
FG3_PCT: 0.0
FTM: 60
FTA: 97
FT_PCT: 0.619
OREB: 97
DREB: 202
REB: 299
AST: 32
TOV: 45
STL: 22
BLK: 5
BLKA: 33
PF: 91
PFD: 79
PTS: 176
PLUS_MINUS: -45
NBA_FANTASY_PTS: 618.8
DD2: 2
TD3: 0
GP_RANK: 9
W_RANK: 9
L_RANK: 10
W_PCT_RANK: 12
MIN_RANK: 11
FGM_RANK: 9
FGA_RANK: 9
FG_PCT_RANK: 12
FG3M_RANK: 2
FG3A_RANK: 2
FG3_PCT_RANK: 2
FTM_RANK: 9
FTA_RANK: 9
FT_PCT_RANK: 1
OREB_RANK: 11
DREB_RANK: 9
REB_RANK: 9
AST_RANK: 8
TOV_RANK: 8
STL_RANK: 12
BLK_RANK: 10
BLKA_RANK: 11
PF_RANK: 6
PFD_RANK: 9
PTS_RANK: 9
PLUS_MINUS_RANK: 7
NBA_FANTASY_PTS_RANK: 10
DD2_RANK: 6
TD3_RANK: 1
CFID: 264
CFPARAMS: 2014-15
GROUP_SET: By Year
GROUP_VALUE: 2013-14
TEAM_ID: -1
TEAM_ABBREVIATION: TOT
MAX_GAME_DATE: 2014-04-12T00:00:00
GP: 54
W: 18
L: 36
W_PCT: 0.333
MIN: 898.9816666666667
FGM: 71
FGA: 

GROUP_SET: By Year
GROUP_VALUE: 2002-03
TEAM_ID: 1610612742
TEAM_ABBREVIATION: DAL
MAX_GAME_DATE: 2003-02-21T00:00:00
GP: 11
W: 8
L: 3
W_PCT: 0.727
MIN: 89.39333333333333
FGM: 8
FGA: 35
FG_PCT: 0.229
FG3M: 1
FG3A: 5
FG3_PCT: 0.2
FTM: 0
FTA: 0
FT_PCT: 0.0
OREB: 4
DREB: 4
REB: 8
AST: 6
TOV: 6
STL: 3
BLK: 0
BLKA: 1
PF: 11
PFD: 0
PTS: 17
PLUS_MINUS: -60
NBA_FANTASY_PTS: 38.6
DD2: 0
TD3: 0
GP_RANK: 1
W_RANK: 1
L_RANK: 1
W_PCT_RANK: 1
MIN_RANK: 1
FGM_RANK: 1
FGA_RANK: 1
FG_PCT_RANK: 1
FG3M_RANK: 1
FG3A_RANK: 1
FG3_PCT_RANK: 1
FTM_RANK: 1
FTA_RANK: 1
FT_PCT_RANK: 1
OREB_RANK: 1
DREB_RANK: 1
REB_RANK: 1
AST_RANK: 1
TOV_RANK: 1
STL_RANK: 1
BLK_RANK: 1
BLKA_RANK: 1
PF_RANK: 1
PFD_RANK: 1
PTS_RANK: 1
PLUS_MINUS_RANK: 1
NBA_FANTASY_PTS_RANK: 1
DD2_RANK: 1
TD3_RANK: 1
CFID: 264
CFPARAMS: 2002-03
GROUP_SET: By Year
GROUP_VALUE: 2019-20
TEAM_ID: 1610612747
TEAM_ABBREVIATION: LAL
MAX_GAME_DATE: 2020-03-10T00:00:00
GP: 60
W: 47
L: 13
W_PCT: 0.783
MIN: 2094.3366666666666
FGM: 586
FGA: 1176
FG_PCT: 0.498