In [13]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt


In [9]:
# Load the data
spurs = pd.read_csv('..\\data\\spurs.csv')
spurs = spurs[(spurs['season_type']==0)  &(spurs['season']<2020) & (spurs['season']>2000)].reset_index(drop=True)

## Using regular season games to predict regular season games

In [10]:
num_train_samples = int(0.6 * len(spurs))
num_val_samples = int(0.20 * len(spurs))
num_test_samples = len(spurs) - num_train_samples - num_val_samples

In [11]:
sequence_length = 82
batch_size = 32
h = 1
delay = sequence_length + h - 1

scores=spurs['score']

train_dataset = keras.utils.timeseries_dataset_from_array(
    data = scores[:-delay],
    targets=scores[delay:],
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=0,
    end_index=num_train_samples)

val_dataset = keras.utils.timeseries_dataset_from_array(
    data = scores[:-delay],
    targets=scores[delay:],
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples)

test_dataset = keras.utils.timeseries_dataset_from_array(
    data = scores[:-delay],
    targets=scores[delay:],
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples + num_val_samples)

In [12]:
num_features=1
inputs = keras.Input(shape=(sequence_length, num_features))
x = layers.SimpleRNN(64)(inputs)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

callbacks = [keras.callbacks.ModelCheckpoint("RNN.keras",save_best_only=True, monitor='val_loss')]
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])

history = model.fit(train_dataset,
                    epochs=20,
                    validation_data=val_dataset,
                    callbacks=callbacks)

model = keras.models.load_model("RNN.keras")
rnn_mae = model.evaluate(test_dataset)[1]
print(f"Test MAE: {rnn_mae:.3f}")



Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test MAE: 64.022


In [15]:
inputs = keras.Input(shape=(sequence_length, num_features))
x = layers.LSTM(64)(inputs)
outputs = layers.Dense(1)(x)
lstm = keras.Model(inputs, outputs)

lstm.summary()

callbacks = [keras.callbacks.ModelCheckpoint("LSTM.keras",save_best_only=True, monitor='val_loss')]
lstm.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])

history = lstm.fit(train_dataset,
                    epochs=20,
                    validation_data=val_dataset,
                    callbacks=callbacks)

lstm = keras.models.load_model("LSTM.keras")
lstm_mae=lstm.evaluate(test_dataset)[1]
print(f"Test MAE: {lstm_mae:.3f}")

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 82, 1)]           0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                16896     
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 16961 (66.25 KB)
Trainable params: 16961 (66.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test MAE: 74.386


In [16]:
inputs = keras.Input(shape=(sequence_length, num_features))
x = layers.LSTM(128, recurrent_dropout=0.1, return_sequences=True)(inputs)
x = layers.SimpleRNN(128, recurrent_dropout=0.1)(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

model.summary()

callbacks=[keras.callbacks.ModelCheckpoint("RNN_LSTM_Stacked.keras",save_best_only=True)]
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])

history = model.fit(train_dataset, epochs=20, validation_data=val_dataset, callbacks=callbacks)

model = keras.models.load_model("RNN_LSTM_Stacked.keras")
rls_mae = model.evaluate(test_dataset)[1]
print(f"Test MAE: {rls_mae:.3f}")

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 82, 1)]           0         
                                                                 
 lstm_2 (LSTM)               (None, 82, 128)           66560     
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 99585 (389.00 KB)
Trainable params: 99585 (389.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20
Epoch 2/20

In [17]:
## Borrowed your function to evaluate the naive method
def evaluate_naive_method(dataset):
    total_abs_err = 0. # Initializes a variable to accumulate the total absolute error of the predictions. This is a floating-point variable, as indicated by the .0.
    samples_seen = 0   # Initializes a counter for the total number of samples (data points) seen during the evaluation.
    for samples, targets in dataset:
        preds = samples[:, -1] # naive forecast is the last value observed in the sequence
        total_abs_err += np.sum(np.abs(preds - targets))
        samples_seen += samples.shape[0] # number of samples in the batch
    return total_abs_err / samples_seen

print('Validation MAE:', evaluate_naive_method(test_dataset))

Validation MAE: 12.503875968992247


## Using regular season games to predict playoff games

In [None]:
spurs = pd.read_csv('..\\data\\spurs.csv')
spurs = spurs[(spurs['season']!=2012) &(spurs['season']<2020) & (spurs['season']>2000)].reset_index(drop=True)