In [1]:
# https://github.com/swar/nba_api/tree/master
%pip install nba_api

Collecting nba_api
  Downloading nba_api-1.8.0-py3-none-any.whl.metadata (5.7 kB)
Downloading nba_api-1.8.0-py3-none-any.whl (285 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.2/285.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.8.0


In [2]:
import json
import pandas as pd
import numpy as np
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamgamelog
from nba_api.stats.endpoints import boxscoreadvancedv2
import time
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.models import load_model
import joblib


In [17]:
teamsJson = teams.get_teams()
team_dict = {team['full_name'].lower(): team['id'] for team in teamsJson}

def getRecentGames(teamOne, num_games = 10):
    teamStatsOne = teamgamelog.TeamGameLog(team_id = team_dict[teamOne]).get_data_frames()[0].head(num_games)

    statsToBeAvgd = ["FGM", "FGA", "FG_PCT", "FG3M", "FG3A", "FTM", "FTA", "FT_PCT",
                     "OREB", "DREB", "REB" , "AST", "STL", "BLK", "TOV", "PF", "PTS"]

    # average the stats from n-1 most recent games
    avgStatsOne = teamStatsOne.loc[1:num_games-1, statsToBeAvgd].mean().to_frame().T

    # rename the PTS column to AVG_PTS
    avgStatsOne = avgStatsOne.rename(columns={"PTS": "AVG_PTS"})

    # get additional stats
    gameIdOne = teamStatsOne.loc[1:num_games-1, "Game_ID"]

    temp1 = pd.DataFrame()

    for gameId in gameIdOne:
      time.sleep(0.5)
      temp1 = pd.concat([temp1, boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=gameId).get_data_frames()[1]], ignore_index=True)

    moreTeamStatsOne = temp1[temp1["TEAM_NAME"].str.lower() == teamOne.split(" ")[1]]

    moreStatsTobeAvgd = ['E_OFF_RATING', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING',
                         'E_NET_RATING', 'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO',
                         'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT',
                         'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT', 'E_PACE', 'PACE',
                         'PACE_PER40', 'POSS', 'PIE']

    moreStatsAvgOne = moreTeamStatsOne.loc[:, moreStatsTobeAvgd].mean().to_frame().T

    # concatenates the two df's of stats
    avgStatsOne = pd.concat([avgStatsOne, moreStatsAvgOne], axis = 1)

    # adds target to dataframe, points scored in the most recent game
    scoreOne = teamStatsOne.loc[0, "PTS"]
    avgStatsOne["TARGET"] = scoreOne

    return avgStatsOne

data = pd.DataFrame()
for team in team_dict.keys():
    team_data = getRecentGames(team)
    data = pd.concat([data, team_data], axis=0, ignore_index=True)
    #time.sleep(3)
    #print(data)


In [27]:
data.to_csv('/content/drive/My Drive/NBADATA.csv', index=False)

In [32]:
df = pd.read_csv('/content/drive/My Drive/NBADATA.csv')
y = df.pop("TARGET")
X = df

# impute medians
X = X.fillna(X.median())

# scale data
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns, index=X.index)

# save scaler
joblib.dump(scaler, "/content/drive/My Drive/NBA_scaler.pkl")


40


['/content/drive/My Drive/NBA_scaler.pkl']

In [33]:
def build_model(input_shape):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_shape,)),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [34]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

fold = 1
val_mae_scores = []

for train_index, val_index in kf.split(X_scaled):

    # split data
    X_train, X_val = X_scaled.iloc[train_index], X_scaled.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

    # build and train
    model = build_model(input_shape=X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=150, batch_size=8,
                        validation_data=(X_val, y_val), verbose=1)

    # evaluate
    val_loss, val_mae = model.evaluate(X_val, y_val, verbose=0)
    val_mae_scores.append(val_mae)

    print(f"Fold {fold} - Validation MAE: {val_mae:.4f}")
    fold += 1

# avg validation MAE
print(f"\nAverage Validation MAE: {sum(val_mae_scores) / len(val_mae_scores):.4f}")

Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 244ms/step - loss: 13780.7900 - mae: 116.6383 - val_loss: 13412.6221 - val_mae: 115.1202
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 13494.6465 - mae: 115.5456 - val_loss: 13364.4854 - val_mae: 114.9031
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 13740.8096 - mae: 116.6086 - val_loss: 13312.4961 - val_mae: 114.6682
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 13734.8213 - mae: 116.4566 - val_loss: 13254.4092 - val_mae: 114.4060
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 13349.6836 - mae: 114.8726 - val_loss: 13188.6943 - val_mae: 114.1092
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 13052.7676 - mae: 113.6043 - val_loss: 13114.4346 - val_mae: 113.7728
Epoch 7/150
[1m3/3[0m [32m━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 163ms/step - loss: 14209.3965 - mae: 118.5175 - val_loss: 13634.4092 - val_mae: 115.8009
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 13400.2373 - mae: 115.1447 - val_loss: 13605.3623 - val_mae: 115.6746
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 14076.4326 - mae: 117.9291 - val_loss: 13578.6914 - val_mae: 115.5567
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 13488.4072 - mae: 115.5114 - val_loss: 13549.8135 - val_mae: 115.4284
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 13759.6396 - mae: 116.5474 - val_loss: 13519.3916 - val_mae: 115.2924
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 14114.5352 - mae: 118.2201 - val_loss: 13486.9678 - val_mae: 115.1481
Epoch 7/150
[1m3/3[0m [32m━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 162ms/step - loss: 14038.3613 - mae: 117.9930 - val_loss: 14912.4766 - val_mae: 121.3156
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 14152.1592 - mae: 118.2311 - val_loss: 14853.2197 - val_mae: 121.0762
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 13756.6875 - mae: 116.6579 - val_loss: 14800.2295 - val_mae: 120.8617
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 13820.9922 - mae: 116.9500 - val_loss: 14751.4365 - val_mae: 120.6620
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 13682.3389 - mae: 116.3671 - val_loss: 14703.7734 - val_mae: 120.4653
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 13313.5879 - mae: 114.5508 - val_loss: 14654.3486 - val_mae: 120.2610
Epoch 7/150
[1m3/3[0m [32m━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 156ms/step - loss: 13955.4453 - mae: 117.2382 - val_loss: 13186.4170 - val_mae: 114.4645
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 13504.3975 - mae: 115.2889 - val_loss: 13167.2021 - val_mae: 114.3774
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 14327.5342 - mae: 118.9627 - val_loss: 13144.8281 - val_mae: 114.2750
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 13924.0039 - mae: 117.3951 - val_loss: 13122.5049 - val_mae: 114.1735
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 13896.1211 - mae: 117.0870 - val_loss: 13093.1631 - val_mae: 114.0407
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 14221.5996 - mae: 118.4024 - val_loss: 13056.8975 - val_mae: 113.8768
Epoch 7/150
[1m3/3[0m [32m━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 159ms/step - loss: 13522.8711 - mae: 115.6312 - val_loss: 14434.7383 - val_mae: 119.5247
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 14005.9297 - mae: 117.4361 - val_loss: 14384.0508 - val_mae: 119.3091
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 13228.4219 - mae: 114.1609 - val_loss: 14330.6182 - val_mae: 119.0818
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 13403.4014 - mae: 115.0472 - val_loss: 14273.3564 - val_mae: 118.8384
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 13421.5273 - mae: 114.8995 - val_loss: 14211.4971 - val_mae: 118.5766
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 13414.8281 - mae: 115.2435 - val_loss: 14144.1768 - val_mae: 118.2919
Epoch 7/150
[1m3/3[0m [32m━━━━━━━━━━

In [35]:
model.save('/content/drive/My Drive/NBA_model.keras')

In [49]:
model = load_model('/content/drive/My Drive/NBA_model.keras')
print(team_dict.keys())
team = str(input("Enter a team from the seleciton above (ensure exact spelling): "))
temp = getRecentGames(team)
temp.pop("TARGET")
scaler = joblib.load("/content/drive/My Drive/NBA_scaler.pkl")

if (temp.isna().sum().sum() > 0):
  print("There are some NA's with the input data for " + team +".")
  print("Would you like to impute the median from the training data for these values? (may result in inaccurate prediction)")
  ans = str(input("Y/N: "))
  if (ans == "Y"):
    temp = temp.fillna(X.median())
    scaledNewData = scaler.transform(temp)

    #print(temp)

    prediction = model.predict(scaledNewData)
    print("The model predicts " + str(prediction[0]) + " points")

  else:
    print("Exiting...")

else:
  scaledNewData = scaler.transform(temp)

  prediction = model.predict(scaledNewData)
  print("The model predicts " + str(prediction[0]) + " points")



dict_keys(['atlanta hawks', 'boston celtics', 'cleveland cavaliers', 'new orleans pelicans', 'chicago bulls', 'dallas mavericks', 'denver nuggets', 'golden state warriors', 'houston rockets', 'los angeles clippers', 'los angeles lakers', 'miami heat', 'milwaukee bucks', 'minnesota timberwolves', 'brooklyn nets', 'new york knicks', 'orlando magic', 'indiana pacers', 'philadelphia 76ers', 'phoenix suns', 'portland trail blazers', 'sacramento kings', 'san antonio spurs', 'oklahoma city thunder', 'toronto raptors', 'utah jazz', 'memphis grizzlies', 'washington wizards', 'detroit pistons', 'charlotte hornets'])
Enter a team from the seleciton above (ensure exact spelling): dallas mavericks
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
The model predicts [133.08704] points
