In [86]:
import pandas as pd
import numpy as np
import math
import random

In [87]:
year = 2025
games_df = pd.read_csv(f'game-data/{year}games.csv', index_col='Unnamed: 0')

In [88]:
advanced_metrics = [
    "AdjO", "AdjD", "EffO", "eFG%", "TO%", "Reb%", "FTR", "EffD", 
    "Opp eFG%", "Opp TO%", "Opp Reb%", "Opp FTR"
]


# compute a 4-game rolling average for each metric, excluding the current game (shifted values)
for metric in advanced_metrics:
    rolling_col_name = f"4G Avg {metric}"
    games_df[rolling_col_name] = games_df.groupby("Team")[metric].shift(1).rolling(window=4, min_periods=1).mean()

In [89]:
games_df.tail(5)

Unnamed: 0,Date,Team,Conference,Opponent,Venue,Result,AdjO,AdjD,EffO,eFG%,...,4G Avg EffO,4G Avg eFG%,4G Avg TO%,4G Avg Reb%,4G Avg FTR,4G Avg EffD,4G Avg Opp eFG%,4G Avg Opp TO%,4G Avg Opp Reb%,4G Avg Opp FTR
1727,2025-03-18,Middle Tennessee,CUSA,Chattanooga,H,L,106.8,108.5,110.7,54.5,...,96.825,46.15,13.475,26.3,19.875,95.975,46.125,21.15,32.6,29.15
9944,2025-03-18,Santa Clara,WCC,UC Riverside,H,W,134.8,97.0,148.4,73.8,...,96.425,45.575,14.575,28.275,21.425,95.225,46.0,19.35,29.5,24.45
9945,2025-03-18,UC Riverside,BW,Santa Clara,A,L,106.6,127.3,91.1,46.8,...,106.55,49.7,14.425,28.7,26.3,110.85,53.325,17.575,32.55,30.7
11041,2025-03-18,Wichita St.,Amer,Oklahoma St.,A,L,114.8,114.4,105.4,45.1,...,108.325,52.225,17.45,27.175,36.175,115.15,54.825,16.825,34.45,35.475
4500,2025-03-18,Kent St.,MAC,St. Bonaventure,A,W,109.9,86.4,107.7,58.8,...,103.425,48.65,17.0,26.7,36.35,112.325,54.575,15.925,29.3,34.525


In [90]:
games_df.columns

Index(['Date', 'Team', 'Conference', 'Opponent', 'Venue', 'Result', 'AdjO',
       'AdjD', 'EffO', 'eFG%', 'TO%', 'Reb%', 'FTR', 'EffD', 'Opp eFG%',
       'Opp TO%', 'Opp Reb%', 'Opp FTR', 'G-SC', 'Opponent Conference',
       'Game Tempo', 'Game Unique ID', 'Coach', 'Opponent Coach', 'Unknown',
       'Game Importance', 'Team Points', 'Opponent Points',
       'Point Differential', 'Pre-Game Team Elo', 'Pre-Game Opponent Elo',
       'Post-Game Team Elo', 'Post-Game Opponent Elo', '4G Avg AdjO',
       '4G Avg AdjD', '4G Avg EffO', '4G Avg eFG%', '4G Avg TO%',
       '4G Avg Reb%', '4G Avg FTR', '4G Avg EffD', '4G Avg Opp eFG%',
       '4G Avg Opp TO%', '4G Avg Opp Reb%', '4G Avg Opp FTR'],
      dtype='object')

In [92]:
all_data = games_df
years = [year for year in range(2008, 2024 + 1)]
for year in years:
    curr_games_df = pd.read_csv(f'game-data/{year}games.csv', index_col='Unnamed: 0')
    all_data = pd.concat([all_data, curr_games_df], axis=0, ignore_index=True)

In [93]:
all_data

Unnamed: 0,Date,Team,Conference,Opponent,Venue,Result,AdjO,AdjD,EffO,eFG%,...,4G Avg EffO,4G Avg eFG%,4G Avg TO%,4G Avg Reb%,4G Avg FTR,4G Avg EffD,4G Avg Opp eFG%,4G Avg Opp TO%,4G Avg Opp Reb%,4G Avg Opp FTR
0,2024-11-04,NJIT,AE,Penn,H,L,83.4,90.8,88.5,43.5,...,,,,,,,,,,
1,2024-11-04,USC,B10,Chattanooga,H,W,105.1,77.8,116.4,56.2,...,,,,,,,,,,
2,2024-11-04,Chattanooga,SC,USC,A,L,86.4,99.9,77.1,33.3,...,,,,,,,,,,
3,2024-11-04,Binghamton,AE,Penn St.,A,L,100.4,133.1,90.2,49.1,...,,,,,,,,,,
4,2024-11-04,Penn St.,B10,Binghamton,H,W,134.9,102.6,147.6,60.1,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194459,2024-04-06,Purdue,B10,N.C. State,N,W,104.0,71.9,97.9,49.1,...,,,,,,,,,,
194460,2024-04-06,Alabama,SEC,Connecticut,N,L,136.3,109.8,114.5,54.3,...,,,,,,,,,,
194461,2024-04-06,N.C. State,ACC,Purdue,N,L,87.0,81.8,77.7,41.2,...,,,,,,,,,,
194462,2024-04-08,Connecticut,BE,Purdue,N,W,135.0,83.5,122.6,53.2,...,,,,,,,,,,


In [94]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


feature_cols = ['Pre-Game Team Elo', 'Pre-Game Opponent Elo',
    '4G Avg AdjO','4G Avg AdjD', '4G Avg EffO', '4G Avg eFG%', '4G Avg TO%',
    '4G Avg Reb%', '4G Avg FTR', '4G Avg EffD', '4G Avg Opp eFG%',
    '4G Avg Opp TO%', '4G Avg Opp Reb%', '4G Avg Opp FTR'
]


all_data.dropna(inplace=True)
all_data['Result Binary'] = all_data['Result'].apply(lambda x: 1 if x == 'W' else 0)

X = all_data[feature_cols]
y = all_data["Result Binary"]  # 1 = team A won, 0 = lost

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [107]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1]))
model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(64))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=1000, batch_size=128, verbose=1)

Epoch 1/1000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6139 - loss: 0.6905
Epoch 2/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6768 - loss: 0.6234
Epoch 3/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6792 - loss: 0.6068
Epoch 4/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6633 - loss: 0.6149
Epoch 5/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6786 - loss: 0.6107
Epoch 6/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6766 - loss: 0.6081
Epoch 7/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6730 - loss: 0.6051
Epoch 8/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6696 - loss: 0.6126
Epoch 9/1000
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x37df437c0>

In [108]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")
print("Neural Network Accuracy:", accuracy_score(y_test, y_pred))

[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 417us/step
Neural Network Accuracy: 0.6598731884057971
