ANN Final Project
Authors: Caleb Johnson, Gabe Schwartz, Evan Kates
Network Module

In [1]:
# import data from csv file
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, losses, regularizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# load csv data
df = pd.read_csv('training.csv')

# define input features
feature_cols = ['GP_r', 'MIN_r', 'FG_PCT_r', 'REB_r', 'AST_r', 'PTS_r', 'TOV_r']

X = df[feature_cols]

# define predicted features
target_cols = ['PTS_s', 'REB_s', 'AST_s']
y = df[target_cols]

# create train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# create model
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],),
                       kernel_regularizer=regularizers.l1(0.001)),  # L1 regularization
    keras.layers.Dense(64, activation='relu',
                       kernel_regularizer=regularizers.l1(0.001)),
    keras.layers.Dense(3)  # output layer predicts 3 stats
])

# stop early to avoid overfitting
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True # After stopping, roll back to the best model
)

model.compile(optimizer='adam', loss=losses.Huber(), metrics=['mae'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [2]:
# train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=300,
    batch_size=32,
    callbacks=[early_stop]
)

# evaluate model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {mae}")

# make predictions
predictions = model.predict(X_test)

# examples
for i in range(5):
    print(f"Predicted: {predictions[i]} | Actual: {y_test.iloc[i].values}")

Epoch 1/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 4.8747 - mae: 4.5414 - val_loss: 4.4820 - val_mae: 4.1580
Epoch 2/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.0568 - mae: 3.7258 - val_loss: 3.7151 - val_mae: 3.3914
Epoch 3/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.2618 - mae: 2.9386 - val_loss: 2.9778 - val_mae: 2.6861
Epoch 4/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.7582 - mae: 2.4679 - val_loss: 2.3816 - val_mae: 2.1071
Epoch 5/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.1223 - mae: 1.8393 - val_loss: 2.0982 - val_mae: 1.8386
Epoch 6/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.9473 - mae: 1.6881 - val_loss: 1.9913 - val_mae: 1.7698
Epoch 7/300
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1

In [3]:
# Analyze results
from scipy import stats

# predict
y_pred = model.predict(X_test)

# calculate error in predictions
errors = y_test.values - y_pred

# evaluate std dev of error for confidence
std_dev = np.std(errors, axis=0)

# compute confidence interval
conf_interval = 1.645 * std_dev # 90% confidence

print("We are 90% confident that the stats will be within these ranges:")
print(f"Points: ±{conf_interval[0]:.2f}")
print(f"Rebounds: ±{conf_interval[1]:.2f}")
print(f"Assists: ±{conf_interval[2]:.2f}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
We are 90% confident that the stats will be within these ranges:
Points: ±5.67
Rebounds: ±2.36
Assists: ±1.59


In [4]:
# Save model to use for predictions
model.save("stat_prediction_model.h5")

