In [1]:
#possible data sources:
#nba's api: https://gom-uat.ngss.nba.com/ui/developer (looks like better for real time data)
#Basketball-Reference.com  (or Sports-Reference API)
#stats.nba.com

In [63]:
import pandas as pd
import nba_api

In [2]:
#references:
# https://thedatajocks.com/time-series-sports-101/

In [3]:
# notebook for scraping https://github.com/swar/nba_api/blob/master/docs/examples/Basics.ipynb

In [4]:
#reading in player career stats
from nba_api.stats.endpoints import playercareerstats
# Anthony Davis
career = playercareerstats.PlayerCareerStats(player_id="203076")
career.get_data_frames()[0]



Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,203076,2012-13,0,1610612740,NOH,20.0,64,60,1846.0,349,...,0.751,165,357,522,63,75,112,89,158,867
1,203076,2013-14,0,1610612740,NOP,21.0,67,66,2358.0,522,...,0.791,207,466,673,105,89,189,109,200,1394
2,203076,2014-15,0,1610612740,NOP,22.0,68,68,2455.0,642,...,0.805,173,523,696,149,100,200,95,141,1656
3,203076,2015-16,0,1610612740,NOP,23.0,61,61,2164.0,560,...,0.758,130,497,627,116,78,125,121,148,1481
4,203076,2016-17,0,1610612740,NOP,24.0,75,75,2708.0,770,...,0.802,172,712,884,157,94,167,181,168,2099
5,203076,2017-18,0,1610612740,NOP,25.0,75,75,2727.0,780,...,0.828,187,644,831,174,115,193,162,159,2110
6,203076,2018-19,0,1610612740,NOP,26.0,56,56,1850.0,530,...,0.794,174,498,672,218,88,135,112,132,1452
7,203076,2019-20,0,1610612747,LAL,27.0,62,62,2131.0,551,...,0.846,142,435,577,200,91,143,154,156,1618
8,203076,2020-21,0,1610612747,LAL,28.0,36,36,1162.0,301,...,0.738,62,224,286,110,45,59,74,60,786
9,203076,2021-22,0,1610612747,LAL,29.0,40,40,1404.0,370,...,0.713,106,288,394,122,49,90,82,97,927


In [5]:
#getting team ids -- dictionary 
from nba_api.stats.static import teams

# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams = teams.get_teams()
print("Number of teams fetched: {}".format(len(nba_teams)))
nba_teams[:1]

Number of teams fetched: 30


[{'id': 1610612737,
  'full_name': 'Atlanta Hawks',
  'abbreviation': 'ATL',
  'nickname': 'Hawks',
  'city': 'Atlanta',
  'state': 'Georgia',
  'year_founded': 1949}]

In [6]:
#getting player ids -- dictionary
from nba_api.stats.static import players

# get_players returns a list of dictionaries, each representing a player.
nba_players = players.get_players()
print("Number of players fetched: {}".format(len(nba_players)))
nba_players[:1]

Number of players fetched: 4900


[{'id': 76001,
  'full_name': 'Alaa Abdelnaby',
  'first_name': 'Alaa',
  'last_name': 'Abdelnaby',
  'is_active': False}]

In [79]:
#finding specific games
from nba_api.stats.endpoints import leaguegamefinder

nba_teams = teams.get_teams()

#how to get the team id for the celtics
celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
celtics_id = celtics['id']

#query for games where the AD was playing
gamefinder = leaguegamefinder.LeagueGameFinder(player_id_nullable='203076')
#the first DataFrame of those returned is what we want.
games = gamefinder.get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,32023,1610616834,WST,West NBA All Stars West,32300001,2024-02-18,WST @ EST,L,22,6,...,,1,7,8,8,0,1,1,0,-1.4
1,22023,1610612747,LAL,Los Angeles Lakers,22300788,2024-02-14,LAL @ UTA,W,38,37,...,0.769,3,12,15,1,0,2,2,0,4.8
2,22023,1610612747,LAL,Los Angeles Lakers,22300776,2024-02-13,LAL vs. DET,W,28,20,...,1.0,3,11,14,4,0,6,1,1,2.4
3,22023,1610612747,LAL,Los Angeles Lakers,22300747,2024-02-09,LAL vs. NOP,W,29,20,...,0.857,0,6,6,6,1,1,3,4,0.0
4,22023,1610612747,LAL,Los Angeles Lakers,22300740,2024-02-08,LAL vs. DEN,L,38,32,...,0.667,1,8,9,3,3,4,1,0,-2.2


In [8]:
games.shape

(851, 28)

In [65]:
#get historical team defensive data
# Define the range of seasons you are interested in
start_year = 2001
end_year = 2024  # Change this to the current year as needed

# Initialize an empty list to store the DataFrames
dfs = []

# Loop through the years
for year in range(start_year, end_year + 1):
    # Format the season string
    season_str = f"{year}-{str(year+1)[-2:]}"
    
    # Fetch the data for the season
    ldts = leaguedashteamstats.LeagueDashTeamStats(measure_type_detailed_defense='Defense', season=season_str)
    
    # Get the DataFrame and add a column for the season
    df = ldts.get_data_frames()[0]
    df['Season'] = season_str  # Add the season column to identify the data
    
    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate all DataFrames vertically
all_seasons_df = pd.concat(dfs, ignore_index=True)
all_seasons_df.head()


  all_seasons_df = pd.concat(dfs, ignore_index=True)


Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,DEF_RATING,DREB,DREB_PCT,...,DEF_RATING_RANK,DREB_RANK,DREB_PCT_RANK,STL_RANK,BLK_RANK,OPP_PTS_OFF_TOV_RANK,OPP_PTS_2ND_CHANCE_RANK,OPP_PTS_FB_RANK,OPP_PTS_PAINT_RANK,Season
0,1610612737,Atlanta Hawks,82,33,49,0.402,3956.0,104.8,2445,0.689,...,22,17,9,10,27,27,13,29,21,2001-02
1,1610612738,Boston Celtics,82,49,33,0.598,3966.0,99.9,2570,0.695,...,6,3,5,1,28,7,12,15,1,2001-02
2,1610612766,Charlotte Hornets,82,44,38,0.537,3951.0,101.8,2505,0.689,...,10,12,8,13,12,6,18,12,9,2001-02
3,1610612741,Chicago Bulls,82,21,61,0.256,3961.0,106.2,2359,0.685,...,25,25,12,17,25,21,8,7,14,2001-02
4,1610612739,Cleveland Cavaliers,82,29,53,0.354,3971.0,107.0,2483,0.69,...,28,14,7,23,9,23,9,19,20,2001-02


In [66]:
all_seasons_df.shape

(687, 31)

In [82]:
#subset for only serious nba games
games = games.loc[games.TEAM_NAME.isin(['New Orleans Pelicans', 'Los Angeles Lakers', 'New Orleans Hornets'])]
games['opp_abr'] = games['MATCHUP'].str.extract(r'(?:@ |vs\. )([A-Z]{3})')
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,opp_abr
1,22023,1610612747,LAL,Los Angeles Lakers,22300788,2024-02-14,LAL @ UTA,W,38,37,...,3,12,15,1,0,2,2,0,4.8,UTA
2,22023,1610612747,LAL,Los Angeles Lakers,22300776,2024-02-13,LAL vs. DET,W,28,20,...,3,11,14,4,0,6,1,1,2.4,DET
3,22023,1610612747,LAL,Los Angeles Lakers,22300747,2024-02-09,LAL vs. NOP,W,29,20,...,0,6,6,6,1,1,3,4,0.0,NOP
4,22023,1610612747,LAL,Los Angeles Lakers,22300740,2024-02-08,LAL vs. DEN,L,38,32,...,1,8,9,3,3,4,1,0,-2.2,DEN
5,22023,1610612747,LAL,Los Angeles Lakers,22300713,2024-02-05,LAL @ CHA,W,37,26,...,3,12,15,11,0,3,2,1,3.2,CHA


In [85]:
#convert list of dictionaries of 30 nba teams to DataFrame
teams_df = pd.DataFrame(nba_teams)

# Merge the original DataFrame with the teams DataFrame to match 'opp_abr' with 'abbreviation' and get 'id'
games = games.merge(teams_df[['abbreviation', 'id']], left_on='opp_abr', right_on='abbreviation', how='left')

# Rename 'id' column to 'opp_id' and drop the redundant 'abbreviation' column
games.rename(columns={'id': 'opp_id'}, inplace=True)
games.drop(columns=['abbreviation'], inplace=True)
games.head()


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,opp_abr,opp_id
0,22023,1610612747,LAL,Los Angeles Lakers,22300788,2024-02-14,LAL @ UTA,W,38,37,...,12,15,1,0,2,2,0,4.8,UTA,1610612762
1,22023,1610612747,LAL,Los Angeles Lakers,22300776,2024-02-13,LAL vs. DET,W,28,20,...,11,14,4,0,6,1,1,2.4,DET,1610612765
2,22023,1610612747,LAL,Los Angeles Lakers,22300747,2024-02-09,LAL vs. NOP,W,29,20,...,6,6,6,1,1,3,4,0.0,NOP,1610612740
3,22023,1610612747,LAL,Los Angeles Lakers,22300740,2024-02-08,LAL vs. DEN,L,38,32,...,8,9,3,3,4,1,0,-2.2,DEN,1610612743
4,22023,1610612747,LAL,Los Angeles Lakers,22300713,2024-02-05,LAL @ CHA,W,37,26,...,12,15,11,0,3,2,1,3.2,CHA,1610612766


In [87]:
games.SEASON_ID.value_counts()

SEASON_ID
22016    75
22017    75
22014    68
22013    67
22012    64
22019    62
22015    61
22022    56
22018    56
22023    52
22021    40
22020    36
42019    21
42022    16
42017     9
12013     8
12012     7
12014     7
12019     6
12015     6
12016     5
12023     5
42020     5
12021     5
12017     4
12018     4
42014     4
12022     3
12020     2
52020     1
62023     1
52022     1
Name: count, dtype: int64

In [53]:
from nba_api.stats.endpoints import teamestimatedmetrics

estmet = teamestimatedmetrics.TeamEstimatedMetrics()
metrics = estmet.get_data_frames()[0]
metrics.head()

Unnamed: 0,TEAM_NAME,TEAM_ID,GP,W,L,W_PCT,MIN,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,...,MIN_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TM_TOV_PCT_RANK,E_PACE_RANK
0,Washington Wizards,1610612764,54,9,45,0.167,2592.0,109.2,117.2,-8.0,...,27,25,27,26,8,29,30,30,17,1
1,Indiana Pacers,1610612754,56,31,25,0.554,2688.0,117.9,116.8,1.1,...,5,3,26,14,1,19,27,23,11,2
2,Atlanta Hawks,1610612737,55,24,31,0.436,2660.0,115.6,118.4,-2.8,...,8,8,30,22,24,3,24,15,10,3
3,San Antonio Spurs,1610612759,55,11,44,0.2,2650.0,106.9,115.8,-8.9,...,13,28,24,28,4,23,21,24,23,4
4,Milwaukee Bucks,1610612749,56,35,21,0.625,2703.0,117.3,113.8,3.5,...,3,5,17,8,17,26,11,17,6,5


In [54]:
metrics.shape

(30, 30)

In [None]:
#want to predict player's point totals in a given game based on:
#how that player is doing, how their team is doing
#how the opponent is on offense, defense, overall
#possible -- matchup information?