In [1]:
from nba_api.stats.endpoints import leaguedashteamstats, scoreboard, leaguegamefinder
from nba_api.stats.static import teams
import requests
import json
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import precision_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:
# NBA TEAM IDS:
# https://github.com/bttmly/nba/blob/master/data/teams.json

In [3]:
game_id = '0022300984'
# Get **all** the games so we can filter to an individual GAME_ID
result = leaguegamefinder.LeagueGameFinder()
all_games = result.get_data_frames()[0]
# Find the game_id we want
full_game = all_games[all_games.GAME_ID == game_id]
full_game
def combine_team_games(df, keep_method='home'):
    '''Combine a TEAM_ID-GAME_ID unique table into rows by game. Slow.
        Parameters
        ----------
        df : Input DataFrame.
        keep_method : {'home', 'away', 'winner', 'loser', ``None``}, default 'home'
            - 'home' : Keep rows where TEAM_A is the home team.
            - 'away' : Keep rows where TEAM_A is the away team.
            - 'winner' : Keep rows where TEAM_A is the losing team.
            - 'loser' : Keep rows where TEAM_A is the winning team.
            - ``None`` : Keep all rows. Will result in an output DataFrame the same
                length as the input DataFrame.
        Returns
        -------
        result : DataFrame
    '''
    # Join every row to all others with the same game ID.
    joined = pd.merge(df, df, suffixes=['_AWAY', '_HOME'],
                      on=['SEASON_ID', 'GAME_ID', 'GAME_DATE'])
    # Filter out any row that is joined to itself.
    result = joined[joined.TEAM_ID_AWAY != joined.TEAM_ID_HOME]
    # Take action based on the keep_method flag.
    if keep_method is None:
        # Return all the rows.
        pass
    elif keep_method.lower() == 'home':
        # Keep rows where TEAM_A is the home team.
        result = result[result.MATCHUP_AWAY.str.contains(' vs. ')]
    elif keep_method.lower() == 'away':
        # Keep rows where TEAM_A is the away team.
        result = result[result.MATCHUP_AWAY.str.contains(' @ ')]
    elif keep_method.lower() == 'winner':
        result = result[result.WL_AWAY == 'W']
    elif keep_method.lower() == 'loser':
        result = result[result.WL_AWAY == 'L']
    else:
        raise ValueError(f'Invalid keep_method: {keep_method}')
    return result
# Combine the game rows into one. By default, the home team will be TEAM_A.
game_df = combine_team_games(full_game)

In [4]:
celtics_id = 1610612738
knicks_id = 1610612752

def get_dfs(team_id):
    seasons = ['2022-23', '2021-22', '2020-21']
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_nullable='2023-24')
    all_team_games = gamefinder.get_data_frames()[0]

    for i in seasons:
        new_gf = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_nullable=i)
        new_games = new_gf.get_data_frames()[0]
        all_team_games = all_team_games.append(new_games)

    all_team_games.index = range(0, all_team_games.shape[0])
    game_ids = []
    for i in range(all_team_games.shape[0]):
        game_ids.append(all_team_games.loc[i]['GAME_ID'])
        
    rows = []
    for id in game_ids:
        get_games = all_games[all_games.GAME_ID == id]
        new_row = combine_team_games(get_games).iloc[0]
        rows.append(new_row)
    df = pd.DataFrame(rows)
    df_away = df[df['TEAM_ID_AWAY'].isin([team_id])]
    df_home = df[df['TEAM_ID_HOME'].isin([team_id])]
    df_home = df_home.sort_values(by='GAME_DATE', ascending=True)
    df_away = df_away.sort_values(by='GAME_DATE', ascending=True)
    df_away.index = range(0,df_away.shape[0])
    df_home.index = range(0,df_home.shape[0])
    return df_home, df_away

In [5]:
def rolling_averages(group, cols, new_cols):
    rolling_stats = group[cols].rolling(5, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group

In [6]:
def add_new_cols(team_id):
    df_home, df_away = get_dfs(team_id)
    cols = ['PTS_AWAY', 'FGM_AWAY',
           'FGA_AWAY', 'FG_PCT_AWAY', 'FG3M_AWAY', 'FG3A_AWAY', 'FG3_PCT_AWAY', 'FTM_AWAY', 'FTA_AWAY',
           'FT_PCT_AWAY', 'OREB_AWAY', 'DREB_AWAY', 'REB_AWAY', 'AST_AWAY', 'BLK_AWAY',
           'TOV_AWAY', 'PLUS_MINUS_AWAY', 'PTS_HOME', 'FGM_HOME', 'FGA_HOME',
           'FG_PCT_HOME', 'FG3M_HOME', 'FG3A_HOME', 'FG3_PCT_HOME', 'FTM_HOME', 'FTA_HOME',
           'FT_PCT_HOME', 'OREB_HOME', 'DREB_HOME', 'REB_HOME', 'AST_HOME', 'BLK_HOME',
           'TOV_HOME', 'PLUS_MINUS_HOME']
    new_cols = [f"{c}_rolling" for c in cols]
    
    df_home = rolling_averages(df_home, cols, new_cols)
    df_away = rolling_averages(df_away, cols, new_cols)
    df_home.index = range(0,df_home.shape[0])
    df_away.index = range(0,df_away.shape[0])
    return df_home, df_away, new_cols

In [7]:
def df_with_target(team_id):
    df_home, df_away, new_cols = add_new_cols(team_id)
    df_home["W"] = (df_home["WL_HOME"] == "W").astype("int")
    df_away["W"] = (df_away["WL_AWAY"] == "W").astype("int")
    df_home.index = range(0,df_home.shape[0])
    df_away.index = range(0,df_away.shape[0])
    return df_home, df_away, new_cols

In [8]:
def make_pred(data, predictors):
#     model = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)
    model = GaussianNB()
    train, test = train_test_split(data, test_size=0.2)
    model.fit(train[predictors], train["W"])
    preds = model.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["W"], predicted=preds), index=test.index)
    precision = precision_score(test["W"], preds)
    return combined, precision, model

In [9]:
def find_accuracy(team_id):
    df_home, df_away, new_cols = df_with_target(team_id)
    combined_home, precision_home, model_home = make_pred(df_home, new_cols)
    combined_away, precision_away, model_away = make_pred(df_away, new_cols)
    return model_home, model_away, precision_home, precision_away

In [14]:
predictions=[]
def predictor(away_id, home_id):
    df_home, df_away1, new_cols = df_with_target(home_id)
    df_home1, df_away, new_cols = df_with_target(away_id)
    
    home_stats = list(df_home.loc[:, 'PTS_HOME_rolling':'PLUS_MINUS_HOME_rolling'].iloc[-1])
    away_stats = list(df_away.loc[:, 'PTS_AWAY_rolling':'PLUS_MINUS_AWAY_rolling'].iloc[-1])
    new_stats = [away_stats+home_stats]
    
    model_home, model_away, precision_home, precision_away = find_accuracy(home_id)
    
    if precision_home < 0.65:
        print("Loading...")
        predictor(away_id, home_id)
    
    precision_home *= 100
    precision_home = round(precision_home, 1)  
    df_home = pd.DataFrame(new_stats, columns=new_cols)
    df_home['W'] = model_home.predict(new_stats)
    
    if df_home['W'].iloc[0] == 1:
        result = "win"
    else: 
        result = "lose"
        
    predictions.append(f"I am {precision_home}% confident the home team will {result} against the away team.")
    return predictions

In [15]:
def lepredictor(away_id, home_id):
    predictions = predictor(away_id, home_id)
    print(predictions[0])
    predictions.clear()

In [16]:
cavs = 1610612739
clippers = 1610612746
knicks = knicks_id
celtics = celtics_id
hawks = 1610612737
nets = 1610612751
hornets = 1610612766
bulls = 1610612741
mavericks = 1610612742
nuggets = 1610612743
pistons = 1610612765
warriors = 1610612744
rockets = 1610612745
pacers = 1610612754
lakers = 1610612747
grizzlies = 1610612763
heat = 1610612748
bucks = 1610612749
timberwolves = 1610612750
pelicans = 1610612740
thunder = 1610612760
magic = 1610612753
sixers = 1610612755
suns = 1610612756
blazers = 1610612757
kings = 1610612758
spurs = 1610612759
raptors = 1610612761
jazz = 1610612762
wizards = 1610612764

In [13]:
# lepredictor(knicks_id, celtics_id) # Knicks @ Celtics

In [14]:
# lepredictor(knicks_id, 1610612766) # Knicks @ Hornets

In [15]:
# lepredictor(1610612766, knicks_id) # Hornets @ Knicks

In [16]:
# lepredictor(1610612737, 1610612763) # Sixers @ Grizzlies

In [17]:
# lepredictor(1610612755, 1610612743) # Hawks @ Nuggets

In [18]:
# lepredictor(1610612739, 1610612747) # Cavs @ Lakers

In [19]:
# lepredictor(1610612765, 1610612751) # Pistons @ Nets

In [17]:
lepredictor(cavs, clippers) # Cavs @ Clips

Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
I am 66.7% confident the home team will win against the away team.


In [18]:
lepredictor(rockets, mavericks) # Rockets @ Mavs

I am 82.4% confident the home team will win against the away team.


In [19]:
lepredictor(heat, pacers) # Heat @ Pacers

Loading...
I am 75.0% confident the home team will lose against the away team.


In [20]:
lepredictor(blazers, celtics) # Blazers @ Celtics

Loading...
Loading...
Loading...
I am 66.7% confident the home team will lose against the away team.


In [21]:
lepredictor(bulls, magic)

Loading...
Loading...
Loading...
Loading...
Loading...
I am 75.0% confident the home team will win against the away team.


In [22]:
lepredictor(wizards, raptors)

Loading...
Loading...
Loading...
Loading...
Loading...
I am 66.7% confident the home team will win against the away team.


In [23]:
lepredictor(thunder, hornets)

Loading...
I am 66.7% confident the home team will lose against the away team.
