In [1]:
from pymongo import MongoClient
import pprint

import pandas as pd

import numpy as np

import json
import time

In [2]:
# Import proper modules from nba_api

from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import TeamPlayerDashboard

## In reading the nba_api documentation on github, we know the TeamPlayerDashboard() object will allow us to compile yearly rosters.

See object exploration below to determine parsing workflow. 

In [3]:
one_team = TeamPlayerDashboard(team_id = '1610612737', season = '2018-19')

In [4]:
one_team = one_team.get_dict()

In [5]:
one_team.keys()

dict_keys(['resource', 'parameters', 'resultSets'])

In [6]:
type(one_team['resultSets'])

list

In [7]:
len(one_team['resultSets'])

2

In [8]:
for i in one_team['resultSets']:
    print(type(i))

<class 'dict'>
<class 'dict'>


In [9]:
for i in one_team['resultSets']:
    for k in i.keys():
        print(f'{k}, {type(i[k])}, {i[k]}')

name, <class 'str'>, TeamOverall
headers, <class 'list'>, ['GROUP_SET', 'TEAM_ID', 'TEAM_NAME', 'GROUP_VALUE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK']
rowSet, <class 'list'>, [['Overall', 1610612737, 'Atlanta Hawks', '2018-19', 82, 29, 53, 0.354, 3971.0, 3392, 7524, 0.451, 1067, 3034, 0.352, 1443, 1918, 0.752, 955, 2825, 3780, 2118, 1397.0, 675, 419, 448, 1932, 1817, 9294, -494.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
name, <class 'str'>, PlayersSeasonTotals
header

In [10]:
rel = ['PLAYER_ID']
rel_idx = 0
for idx, val in enumerate(one_team['resultSets'][1]['headers']):
    if val in rel:
        rel_idx = idx

rel_idx

1

In [11]:
type(one_team['resultSets'][1]['rowSet'])

list

In [12]:
roster = []


for row in one_team['resultSets'][1]['rowSet']:
    roster.append(row[rel_idx])
        
roster

[203458,
 1627816,
 1629168,
 1627772,
 1627761,
 203473,
 1627738,
 1629353,
 1629121,
 202391,
 1628381,
 1626296,
 1626147,
 203145,
 1628989,
 203101,
 1629016,
 1627752,
 1629027,
 1628416,
 203092,
 1713]

Now that we know how to pull our rosters, we can iterate through unique team IDs and seasons to pull relevant data.

In [13]:
# Import proper modules from nba_api

from nba_api.stats.static import players, teams

In [14]:
# generate unique team IDs

teams = teams.get_teams()
type(teams)


list

In [15]:
teams[0]

{'id': 1610612737,
 'full_name': 'Atlanta Hawks',
 'abbreviation': 'ATL',
 'nickname': 'Hawks',
 'city': 'Atlanta',
 'state': 'Atlanta',
 'year_founded': 1949}

In [16]:
team_id = []
for i in teams:
    team_id.append(i['id'])

team_id = np.unique(team_id)

team_id

array([1610612737, 1610612738, 1610612739, 1610612740, 1610612741,
       1610612742, 1610612743, 1610612744, 1610612745, 1610612746,
       1610612747, 1610612748, 1610612749, 1610612750, 1610612751,
       1610612752, 1610612753, 1610612754, 1610612755, 1610612756,
       1610612757, 1610612758, 1610612759, 1610612760, 1610612761,
       1610612762, 1610612763, 1610612764, 1610612765, 1610612766])

In [17]:
# generate all season values for TeamPlayerDashboard() instantiation.

def get_seasons(fall_start,spring_end):
    
    """PARAMETERS:
    fall_start: int - The year corresponding to the start of a season (fall)
    spring_end: int - The year corresponding to the end of a season(spring)
    
    RETURNS:
    List containg all seasons from which to pull data"""
    
    lst = []
    for i in range(fall_start,spring_end):
        if (i+1)%100 > 9:
            season = f'{i}-{(i+1)%100}'
            lst.append(season)
        else:
            season = f'{i}-0{(i+1)%100}'
            lst.append(season)
    return lst

In [18]:
seasons = get_seasons(1998,2019)

Connecting to MongoDb and uploading

In [19]:
client = MongoClient('localhost', 27017)
db_nba = client['nba']

stats = db_nba['team_rosters']

In [24]:
def scrape_roster(seasons,teams):
    """PARAMETERS:
    
    seasons - (list) a list of the seasons relevant to your inquiry
    teams - (list) a list of unique team IDs for which you want to pull rosters
    
    
    RETURNS:
    None
    - Uploads team rosters by year to a Mongo database
    """
    
    for s in seasons:
        
        for t in teams:
            one_team = TeamPlayerDashboard(team_id = t, season = s)
            one_team = one_team.get_dict()
        
            d_team = dict()
            # convert team and season type from numpy int64 to native python for upload to Mongo
            d_team['team'] = t.item()
            d_team['season'] = s
            
            roster = []
            for row in one_team['resultSets'][1]['rowSet']:
                roster.append(row[rel_idx])
            
            d_team['roster'] = roster
            
#             print(d_team)
            #Upload to Mongo
            stats.insert_one(d_team)
                
        time.sleep(np.random.randint(5,8))
    
    return None

In [25]:
scrape_roster(seasons, team_id)