# NBA Game Predictions

In [254]:
# Dependencies
import pandas as pd
from datetime import datetime, timezone
from dateutil import parser

from nba_api.live.nba.endpoints import scoreboard
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder, boxscorefourfactorsv3, teamestimatedmetrics, leaguedashteamstats

from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [16]:
nba_teams = teams.get_teams()

def get_all_games(seasons):
    all_games = pd.DataFrame()
    for season in seasons:
        season_games = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable='Regular Season').get_data_frames()[0]
        all_games = pd.concat([all_games, season_games], ignore_index=True)
    return all_games

# Define the seasons you want to fetch data for (last 6 regular seasons)
seasons = ['2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23']

all_games_data = get_all_games(seasons)

In [400]:
knicks_games = all_games_data[all_games_data['TEAM_ABBREVIATION'] == 'NYK']
knicks_game_ids = knicks_games['GAME_ID'].reset_index(drop = True)

This is a good starting place, but I want to find more advanced stats than this.

In [38]:
games_24 = all_games_data.sort_values(by='GAME_DATE', ignore_index=True)
games_24 = games_24[~games_24['TEAM_ID'].isin([40, 54])]
knicks_24 = games_24[games_24['TEAM_ABBREVIATION'] == 'NYK']
knicks_24.iloc[0]['GAME_ID']

In [71]:
def single_game_advanced_stats(game_ids):
    advanced_stats = pd.DataFrame()
    for game_id in game_ids:
        boxscore = boxscorefourfactorsv3.BoxScoreFourFactorsV3(game_id=game_id)
        boxscore_data = boxscore.get_data_frames()[1]
        advanced_stats = pd.concat([advanced_stats, boxscore_data], ignore_index=True)

    return advanced_stats

game_ids = knicks_game_ids

advanced_stats_data = single_game_advanced_stats(game_ids=game_ids)
advanced_stats_data

Unnamed: 0,gameId,teamId,teamCity,teamName,teamTricode,teamSlug,minutes,effectiveFieldGoalPercentage,freeThrowAttemptRate,teamTurnoverPercentage,offensiveReboundPercentage,oppEffectiveFieldGoalPercentage,oppFreeThrowAttemptRate,oppTeamTurnoverPercentage,oppOffensiveReboundPercentage
0,0021601220,1610612752,New York,Knicks,NYK,knicks,240:00,0.548,0.386,0.192,0.417,0.532,0.215,0.132,0.420
1,0021601220,1610612755,Philadelphia,76ers,PHI,sixers,240:00,0.532,0.215,0.132,0.420,0.548,0.386,0.192,0.417
2,0021601196,1610612752,New York,Knicks,NYK,knicks,240:00,0.506,0.183,0.118,0.178,0.584,0.299,0.118,0.179
3,0021601196,1610612761,Toronto,Raptors,TOR,raptors,240:00,0.584,0.299,0.118,0.179,0.506,0.183,0.118,0.178
4,0021601181,1610612763,Memphis,Grizzlies,MEM,grizzlies,240:00,0.607,0.160,0.160,0.243,0.466,0.148,0.123,0.352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1091,0022200046,1610612753,Orlando,Magic,ORL,magic,240:00,0.459,0.314,0.133,0.294,0.522,0.269,0.140,0.426
1092,0022200023,1610612752,New York,Knicks,NYK,knicks,240:00,0.611,0.232,0.114,0.280,0.483,0.356,0.141,0.310
1093,0022200023,1610612765,Detroit,Pistons,DET,pistons,240:00,0.483,0.356,0.141,0.310,0.611,0.232,0.114,0.280
1094,0022200009,1610612763,Memphis,Grizzlies,MEM,grizzlies,265:00,0.468,0.213,0.125,0.400,0.445,0.165,0.108,0.324


Now, I am going to combine this into one function that returns a data frame of all game basic data and the four-factor advanced data

In [374]:
advanced_stats_data = advanced_stats_data.rename(columns={'GAMEID':'GAME_ID'})
df = pd.merge(advanced_stats_data, knicks_games, on='GAME_ID')
knicks_df = df[df['teamId'] == 1610612752].reset_index(drop=True)

In [376]:
filtered_cols = ['effectiveFieldGoalPercentage',
       'teamTurnoverPercentage', 'offensiveReboundPercentage',
       'oppEffectiveFieldGoalPercentage',
       'oppTeamTurnoverPercentage',
       'oppOffensiveReboundPercentage', 'WL']

filtered_knicks = knicks_df[filtered_cols]
filtered_knicks = filtered_knicks.rename(columns = {'effectiveFieldGoalPercentage':'EffectiveFieldGoalPercentage',
       'teamTurnoverPercentage':'TeamTurnoverPercentage',
       'offensiveReboundPercentage':'OffensiveReboundPercentage'
       })
knicks_df = filtered_knicks
knicks_df

Unnamed: 0,EffectiveFieldGoalPercentage,TeamTurnoverPercentage,OffensiveReboundPercentage,oppEffectiveFieldGoalPercentage,oppTeamTurnoverPercentage,oppOffensiveReboundPercentage,WL
0,0.548,0.192,0.417,0.532,0.132,0.420,W
1,0.506,0.118,0.178,0.584,0.118,0.179,L
2,0.466,0.123,0.352,0.607,0.160,0.243,L
3,0.458,0.134,0.456,0.581,0.153,0.302,L
4,0.506,0.158,0.367,0.412,0.143,0.291,W
...,...,...,...,...,...,...,...
543,0.462,0.121,0.302,0.445,0.064,0.348,L
544,0.593,0.145,0.298,0.561,0.127,0.315,W
545,0.522,0.140,0.426,0.459,0.133,0.294,W
546,0.611,0.114,0.280,0.483,0.141,0.310,W


In [377]:
X = knicks_df.drop('WL', axis=1)
y = knicks_df['WL'].map({'W':1, 'L':0})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

k_best = SelectKBest(score_func=chi2, k='all')
X_new = k_best.fit_transform(X_train, y_train)


selected_feature_indices = k_best.get_support(indices=True)
print(selected_feature_indices)

selected_features = X_train.iloc[:, selected_feature_indices]
selected_features

[0 1 2 3 4 5]


Unnamed: 0,EffectiveFieldGoalPercentage,TeamTurnoverPercentage,OffensiveReboundPercentage,oppEffectiveFieldGoalPercentage,oppTeamTurnoverPercentage,oppOffensiveReboundPercentage
46,0.572,0.160,0.000,0.577,0.151,0.238
93,0.505,0.161,0.345,0.375,0.124,0.281
464,0.636,0.109,0.200,0.500,0.191,0.240
321,0.527,0.042,0.235,0.518,0.145,0.213
272,0.561,0.061,0.277,0.628,0.154,0.250
...,...,...,...,...,...,...
71,0.500,0.099,0.389,0.541,0.118,0.267
106,0.430,0.184,0.327,0.613,0.195,0.237
270,0.456,0.166,0.246,0.576,0.181,0.340
435,0.506,0.132,0.283,0.404,0.132,0.323


In [378]:
selected_features = X_train.iloc[:, selected_feature_indices]

model = RandomForestClassifier()
model.fit(selected_features, y_train)

X_test_selected = X_test.iloc[:, selected_feature_indices]
y_pred = model.predict(X_test_selected)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.5f}")
print(f"Precision: {precision:.5f}")
print(f"Recall: {recall:.5f}")
print(f"F1 Score: {f1:.5f}")

Accuracy: 0.83636
Precision: 0.85366
Recall: 0.74468
F1 Score: 0.79545


Ok, so now I have a model that is predicting at an 80% clip for the moment. Let me try to turn my attention to getting those four factor current 2023-24 average data for each team and letting it run tonight.


Make this into one function.

In [406]:
def get_today_games():
    f = "{gameId}: {awayTeam} vs. {homeTeam}" 

    board = scoreboard.ScoreBoard()

    # print("ScoreBoardDate:", board.score_board_date)

    todays_games = []
    games = board.games.get_dict()
    for game in games:
        gameId = game['gameId']
        homeTeam = game['homeTeam']['teamName']
        awayTeam = game['awayTeam']['teamName']

        todays_games.append({
            'game_id': gameId,
            'home': homeTeam,
            'away': awayTeam
        })

        # print(f.format(gameId=gameId, awayTeam=awayTeam, homeTeam=homeTeam))

    todays_games = pd.DataFrame(todays_games)
    return todays_games

def interpret_predictions(predictions, merged_df):
    print('Model Predictions for Games Today:')
    for index, row in merged_df.iterrows():
        home_team = row['home']
        away_team = row['away']
        outcome = "win over" if predictions[index] == 1 else "lose to"
        print(f"{home_team} {outcome} {away_team}")

In [407]:
def make_updated_predictions():
    team_metrics = teamestimatedmetrics.TeamEstimatedMetrics(season = '2023-24').get_data_frames()[0]
    team_metrics2 = leaguedashteamstats.LeagueDashTeamStats(season = '2023-24').get_data_frames()[0]

    selected_cols = ['TEAM_ID', 'TEAM_NAME', 'FGM', 'FGA',
        'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT']
    team_metrics2 = team_metrics2[selected_cols]

    def efg_cal(df):
        fgm = df['FGM']
        fg3m = df['FG3M']
        fga = df['FGA']
        df['EFG_PCT'] = (fgm + (0.5 * fg3m))/fga
        return df

    efg = efg_cal(team_metrics2)[['TEAM_ID', 'TEAM_NAME','EFG_PCT']]

    filter_cols = ['TEAM_NAME', 'TEAM_ID', 'E_OREB_PCT', 'E_TM_TOV_PCT']
    team_metrics = team_metrics[filter_cols]
    team_metrics = pd.merge(team_metrics, efg, on=['TEAM_ID', 'TEAM_NAME'])
    team_metrics = team_metrics.rename(columns = {'EFG_PCT':'EffectiveFieldGoalPercentage',
        'E_TM_TOV_PCT':'TeamTurnoverPercentage',
        'E_OREB_PCT':'OffensiveReboundPercentage'})

    todays_games = get_today_games()

    today_games_df = todays_games.merge(nba_teams, left_on='home', right_on='nickname', how='left')
    today_games_df.rename(columns={'id': 'home_id'}, inplace=True)
    today_games_df.drop(columns=['nickname'], inplace=True)

    today_games_df = today_games_df.merge(nba_teams, left_on='away', right_on='nickname', how='left')
    today_games_df.rename(columns={'id': 'away_id'}, inplace=True)
    today_games_df.drop(columns=['nickname'], inplace=True)

    merged_df = today_games_df.merge(team_metrics, left_on='home_id', right_on='TEAM_ID', how='left')
    away_team_metrics = team_metrics.copy()
    away_team_metrics.columns = away_team_metrics.columns.map(lambda x: 'opp' + x)
    df = merged_df.merge(away_team_metrics, left_on='away_id', right_on='oppTEAM_ID', how='left')

    selected_cols = ['EffectiveFieldGoalPercentage', 'TeamTurnoverPercentage','OffensiveReboundPercentage',
        'oppEffectiveFieldGoalPercentage', 'oppTeamTurnoverPercentage', 'oppOffensiveReboundPercentage',]
    df = df[selected_cols]

    predictions = model.predict(df)

    interpreted_predictions = interpret_predictions(predictions, merged_df)

    return interpreted_predictions
    

In [409]:
make_updated_predictions()

Model Predictions for Games Today:
Pacers win over Cavaliers
Bucks win over Knicks
Heat lose to Wizards
Bulls lose to Nets
Thunder lose to Warriors
Nuggets lose to Mavericks
Trail Blazers lose to Grizzlies
