In [None]:
#imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeRegressor
from nba_api.stats.endpoints import *
import time


In [74]:
def player_id(player_name):
    players = leaguedashplayerbiostats.LeagueDashPlayerBioStats()
    players_data = players.get_data_frames()[0]
    player_id = players_data[players_data['PLAYER_NAME'] == player_name].get('PLAYER_ID')
    return player_id
player_id('Luka Dončić')

333    1629029
Name: PLAYER_ID, dtype: int64

In [73]:
def team_id(team_name):
    teams = leaguedashteamstats.LeagueDashTeamStats()
    teams_data = teams.get_data_frames()[0]
    team_id = teams_data[teams_data['TEAM_NAME'] == team_name].get('TEAM_ID')
    return team_id
team_id('Los Angeles Lakers')

13    1610612747
Name: TEAM_ID, dtype: int64

In [3]:
#function to get player gamelog based on Player Name rather than ID

def player_gamelog(player_name):
    id = player_id(player_name)
    gamelog = playergamelog.PlayerGameLog(player_id= id)
    return gamelog.get_data_frames()[0]

In [5]:
def playerVplayer(player_name, vs_player_name):
    id = player_id(player_name)
    vid = player_id(vs_player_name)
    gamelog = playervsplayer.PlayerVsPlayer(vs_player_id=vid, player_id=id)
    return gamelog.get_data_frames()

# playerVplayer('Nikola Jokic', 'Rudy Gobert')

In [6]:
# Will add a Home_Away column to the gamelog so that the ML model can use that as a variable
def homeOrAway(player_gamelog):
    player_gamelog['Home_Away'] = player_gamelog['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
    return player_gamelog

In [None]:
def get_nba_teams():
    nba_teams = teams.get_teams()  # Fetch data
    # Filter to get only active NBA teams and return as DataFrame
    return pd.DataFrame(nba_teams)

nba_teams = get_nba_teams()
print(nba_teams[['id', 'abbreviation', 'full_name']])

In [47]:
def againstThisTeam(player_gamelog, home_team_abbrev, opp_team_abbrev):
    awaySearchString = home_team_abbrev + ' vs. ' + opp_team_abbrev
    homeSearchString = home_team_abbrev + ' @ ' + opp_team_abbrev
    newGameLog = player_gamelog.loc[player_gamelog.MATCHUP.isin([homeSearchString, awaySearchString])]
    return newGameLog

#Test
kingStats = player_gamelog('LeBron James')
print(againstThisTeam(kingStats, 'LAL', 'ATL'))

  SEASON_ID  Player_ID     Game_ID     GAME_DATE      MATCHUP WL  MIN  FGM  \
0     22024       2544  0022400477  JAN 03, 2025  LAL vs. ATL  W   30   13   
8     22024       2544  0022400334  DEC 06, 2024    LAL @ ATL  L   43   14   

   FGA  FG_PCT  ...  DREB  REB  AST  STL  BLK  TOV  PF  PTS  PLUS_MINUS  \
0   20    0.65  ...     3    3    8    0    0    3   3   30           4   
8   25    0.56  ...    10   10   11    2    3    4   3   39           3   

   VIDEO_AVAILABLE  
0                1  
8                1  

[2 rows x 27 columns]


In [48]:
def boxStatsAgainstThisTeam(player_gamelog, home_team_abbrev, opp_team_abbrev):
    new_log = againstThisTeam(player_gamelog, home_team_abbrev, opp_team_abbrev)
    new_log = new_log.loc[:, ['GAME_DATE', 'MATCHUP', 'REB', 'AST', 'STL', 'BLK', 'PTS', 'MIN']]
    print(new_log)
#test
kingStats = player_gamelog('LeBron James')
boxStatsAgainstThisTeam(kingStats, 'LAL', 'ATL')

katStats = player_gamelog("De'Aaron Fox")
boxStatsAgainstThisTeam(katStats, 'SAC', 'MEM')

      GAME_DATE      MATCHUP  REB  AST  STL  BLK  PTS  MIN
0  JAN 03, 2025  LAL vs. ATL    3    8    0    0   30   30
8  DEC 06, 2024    LAL @ ATL   10   11    2    3   39   43
       GAME_DATE      MATCHUP  REB  AST  STL  BLK  PTS  MIN
0   JAN 03, 2025  SAC vs. MEM    3    5    3    0   23   40
12  DEC 05, 2024    SAC @ MEM    6    3    6    0   18   38


In [43]:
def variancePTS(player_gamelog):
    pts = player_gamelog['MIN']
    print(pts.describe())

#test
dearonStats = player_gamelog("De'Aaron Fox")
variancePTS(dearonStats)

count    35.000000
mean     37.342857
std       3.086430
min      31.000000
25%      36.000000
50%      37.000000
75%      39.000000
max      44.000000
Name: MIN, dtype: float64


In [94]:
def getTotalAllowedPTS(team_city):
    team_stats = leaguestandingsv3.LeagueStandingsV3()
    df_teamstats = team_stats.get_data_frames()[0]
    team_row = df_teamstats.query("TeamCity == '{}'".format(team_city))
    if not team_row.empty:
        totalPTSA = team_row.iloc[0]['OppTotalPoints']
        return totalPTSA
    else:
        print("No data found for the specified city")
        return None
def getPPG_allowed(team_city, team_name):
    totalPTSA = getTotalAllowedPTS(team_city)
    team_ID = team_id(team_name)
    team_gamelog = teamgamelog.TeamGameLog(team_id=team_ID)
    df_teamgamelog = team_gamelog.get_data_frames()[0]
    games_played = df_teamgamelog.shape[0]
    ppg_allowed = totalPTSA / games_played
    return ppg_allowed
getPPG_allowed('Los Angeles', 'Los Angeles Lakers')

113.70588235294117

In [91]:
# Testing accuracy for different ML models on point predictions
kingStats = player_gamelog('LeBron James')
print(kingStats)
avgPTS = kingStats['PTS'].mean()
#Add Home_Away column
homeOrAway(kingStats)
# target
y = kingStats.PTS
# Features should be considered carefully, I just threw in ones I think make sense
# Obviously some of these like REB, AST, STL, etc probably don't make sense since we can't know these things before the game, although we might know what his average stats are for those categories before the game
kingStats_features = ['PTS', 'Home_Away', ]
X = kingStats[kingStats_features]
# Splitting data to test for accuracy
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=0)

# DecisionTreeRegressor test
kingStats_model = DecisionTreeRegressor()
kingStats_model.fit(train_X, train_y)
val_predictions = kingStats_model.predict(val_X)
# ~2.833 MAE, <- haven't really put anything in so would not trust that
print(mean_absolute_error(val_y, val_predictions))


   SEASON_ID  Player_ID     Game_ID     GAME_DATE      MATCHUP WL  MIN  FGM  \
0      22024       2544  0022400477  JAN 03, 2025  LAL vs. ATL  W   30   13   
1      22024       2544  0022400468  JAN 02, 2025  LAL vs. POR  W   36   15   
2      22024       2544  0022400454  DEC 31, 2024  LAL vs. CLE  L   34    9   
3      22024       2544  0022400408  DEC 25, 2024    LAL @ GSW  W   37   12   
4      22024       2544  0022400404  DEC 23, 2024  LAL vs. DET  L   35   10   
5      22024       2544  0022400376  DEC 21, 2024    LAL @ SAC  W   34   13   
6      22024       2544  0022400372  DEC 19, 2024    LAL @ SAC  W   34    8   
7      22024       2544  0022401220  DEC 15, 2024  LAL vs. MEM  W   34    7   
8      22024       2544  0022400334  DEC 06, 2024    LAL @ ATL  L   43   14   
9      22024       2544  0022400321  DEC 04, 2024    LAL @ MIA  L   29   12   
10     22024       2544  0022400318  DEC 02, 2024    LAL @ MIN  L   31    4   
11     22024       2544  0022400311  DEC 01, 2024   