ANN Final Project
Authors: Caleb Johnson, Gabe Schwartz, Evan Kates
Network Module

In [None]:
# import data from csv file
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, losses
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Step 2: Load the data
df = pd.read_csv('data.csv')

# Step 3: Define features (X) and targets (y)
# Rookie year features
feature_cols = ['GP_r', 'MIN_r', 'FG_PCT_r', 'FG3M_r', 'FG3A_r', 'REB_r', 'AST_r', 'PTS_r', 'TOV_r']
X = df[feature_cols]

# Sophomore stats to predict
target_cols = ['PTS_s', 'REB_s', 'AST_s']
y = df[target_cols]

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Feature scaling (important for neural nets)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Build the model
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(3)  # Predicting 3 outputs: PTS, REB, AST
])

# Define early stopping
early_stop = EarlyStopping(
    monitor='val_loss',   # Watch the validation loss
    patience=10,          # Number of epochs to wait after no improvement
    restore_best_weights=True # After stopping, roll back to the best model
)

model.compile(optimizer='adam', loss=losses.Huber(), metrics=['mae'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
# Step 7: Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_split=0.2)

# Step 8: Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {mae}")

# Step 9: Make predictions
predictions = model.predict(X_test)

# Show some example predictions
for i in range(5):
    print(f"Predicted: {predictions[i]} | Actual: {y_test.iloc[i].values}")

Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3.7578 - mae: 4.2335 - val_loss: 3.8124 - val_mae: 4.2586
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.1464 - mae: 3.5820 - val_loss: 3.0548 - val_mae: 3.4885
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.3438 - mae: 2.7637 - val_loss: 2.3713 - val_mae: 2.7987
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.8140 - mae: 2.2287 - val_loss: 1.6838 - val_mae: 2.1063
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.2098 - mae: 1.6136 - val_loss: 1.5259 - val_mae: 1.9527
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2390 - mae: 1.6372 - val_loss: 1.4664 - val_mae: 1.8946
Epoch 7/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1

In [28]:
from scipy import stats

# 1. Predict on test data
y_pred = model.predict(X_test)

# 2. Calculate residuals
errors = y_test.values - y_pred

# 3. Estimate standard deviation of residuals
std_dev = np.std(errors, axis=0)

# 4. Compute 95% confidence intervals
# 1.96 standard deviations covers ~95% for a normal distribution

conf_interval = 1.96 * std_dev

print("Approximate 95% confidence interval (±) per output stat:")
print(f"Points: ±{conf_interval[0]:.2f}")
print(f"Rebounds: ±{conf_interval[1]:.2f}")
print(f"Assists: ±{conf_interval[2]:.2f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Approximate 95% confidence interval (±) per output stat:
Points: ±5.67
Rebounds: ±3.33
Assists: ±1.62
