In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np

In [2]:
# Define a dictionary to store all the datasets 
season_statistics = {} 

for year in range(1994, 2024): 
    # Import the CSV file for the corresponding year as a pandas df 
    df = pd.read_csv(f"./data/{year}.csv") 
    
    # Add the dataframe to our dictionary of dataframes 
    season_statistics[year] = df

In [3]:
# Concatenate all season data
df = pd.concat(season_statistics.values(), ignore_index=True)

# Define Categorical and Numerical Columns for Pre-Processing
selected_features = df.columns.drop(['Champion', 'Arena', 'Team', 'Year', 'G'])
# selected_features = ['W%', 'FG%', 'FGA', '3P%', 'FT%', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 
#                      'PPG', 'OPPG', 'Age', 'ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'eFG%', 'Attend./G']

# Create a Data PreProcessor
preprocessor = ColumnTransformer(transformers = [('num', StandardScaler(), selected_features)])

# Separate the Predictors and Response
X = df[selected_features]
y = df['Champion'].values

# Pre-Process the Input Data
X = preprocessor.fit_transform(X)

In [4]:
# Define the K-Fold Cross Validation Sets
kf = KFold(n_splits = 5, shuffle = True, random_state = 669)

# Create a List for each Model and its Accuracy
model_history, mse_history = [], []

# Perform K-Fold Cross Validation
for train_index, test_index in kf.split(X):
    
    # Split into Training and Testing Datasets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Build the Neural Network Model
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        Dense(128, activation='relu'),
        Dense(128, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)
    
    # Predict and evaluate
    y_pred = model.predict(X_test).flatten()
    y_pred, y_test = (y_pred * 100), (y_test * 100)
    mse = mean_squared_error(y_test, y_pred)
    
    # Append the Model and MSE to the respective lists
    model_history.append(model)
    mse_history.append(mse)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50


Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




In [7]:
# Print Performance Metrics
print("Average MSE:", np.mean(mse_history), '\n')
for i in range(len(model_history)):
    print(f"Model #{i}, MSE: {mse_history[i]}")
    
# Get the Best Model
idx = mse_history.index(min(mse_history))
best_model = model_history[idx]

# Perform Accuracy Test
accuracy = 0
for year, df in season_statistics.items():
    
    # Get the Actual Champion
    real_champion = df[df['Champion'] == 1]['Team'].values[0]
    
    # Get the Model Prediction
    X = preprocessor.fit_transform(df[selected_features])
    y = best_model.predict(X)
    pred_champion = df.iloc[np.argmax(y)]['Team']
    
    # Evaluate Results
    accuracy += int(real_champion == pred_champion)
    print(f"{year} NBA Season")
    print(f"\tActual Champion: {real_champion}")
    print(f"\tPredicted Champion: {pred_champion}")
    print()
    
print(f"Model Accuracy: {(accuracy / len(season_statistics)):.2f}% ({accuracy}/{len(season_statistics)})")

Average MSE: 298.30548507749586 

Model #0, MSE: 335.9032311765132
Model #1, MSE: 317.9267020494929
Model #2, MSE: 354.15593068639487
Model #3, MSE: 228.08177717721634
Model #4, MSE: 255.45978429786194
1994 NBA Season
	Actual Champion: Houston Rockets
	Predicted Champion: Houston Rockets

1995 NBA Season
	Actual Champion: Houston Rockets
	Predicted Champion: Orlando Magic

1996 NBA Season
	Actual Champion: Chicago Bulls
	Predicted Champion: Chicago Bulls

1997 NBA Season
	Actual Champion: Chicago Bulls
	Predicted Champion: Chicago Bulls

1998 NBA Season
	Actual Champion: Chicago Bulls
	Predicted Champion: Chicago Bulls

1999 NBA Season
	Actual Champion: San Antonio Spurs
	Predicted Champion: Utah Jazz

2000 NBA Season
	Actual Champion: Los Angeles Lakers
	Predicted Champion: Los Angeles Lakers

2001 NBA Season
	Actual Champion: Los Angeles Lakers
	Predicted Champion: Portland Trail Blazers

2002 NBA Season
	Actual Champion: Los Angeles Lakers
	Predicted Champion: Los Angeles Lakers

20

In [8]:
# Get the 2024 NBA Season Data
df = pd.read_csv(f"./data/2024.csv")

# Get the Model Prediction
X = preprocessor.fit_transform(df[selected_features])
y = best_model.predict(X)
pred_champion = df.iloc[np.argmax(y)]['Team']

# Evaluate Results
print(f"2024 Predicted Champion: {pred_champion}")

2024 Predicted Champion: Boston Celtics
