# Imports

In [1]:
from tqdm import tqdm
import numpy as np
import json
import os

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load JSON Files as Datasets

In [2]:
class JSONDataset(Dataset):
    def __init__(self, json_file, transform=None):
        with open(json_file, 'r', encoding='utf-8') as f:
            self.data = json.load(f)
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if not self.data:
            raise IndexError("Dataset is empty or not loaded properly.")
        
        sample = self.data[idx]
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

def custom_transform(sample):
    input_data = torch.tensor([
        sample['PTS'], 
        sample['REB'], 
        sample['AST'], 
        sample['STL'], 
        sample['BLK']
    ], dtype=torch.float32)
    
    target = input_data.clone()
    
    return input_data, target

In [3]:
NUM_EPOCHS = 40
BATCH_SIZE = 2
lr = 0.001

In [4]:
def get_json_file_paths(directory):
    json_file_paths = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".json"):
                json_file_paths.append(os.path.relpath(os.path.join(root, file), directory))
    return json_file_paths

json_file_paths = get_json_file_paths(".")
print("Relative JSON file paths:")
print(json_file_paths)

Relative JSON file paths:
['Luka_Doncic_2018-19_regular_season_games.json', 'Luka_Doncic_2019-20_regular_season_games.json', 'Luka_Doncic_2020-21_regular_season_games.json', 'Luka_Doncic_2021-22_regular_season_games.json', 'Luka_Doncic_2022-23_regular_season_games.json', 'Luka_Doncic_2023-24_regular_season_games.json']


In [5]:
train_datasets = [JSONDataset(json_file, transform=custom_transform) for json_file in json_file_paths[0:-2]]
train_loaders = [DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) for dataset in train_datasets]

val_dataset = JSONDataset(json_file_paths[-2], transform=custom_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

test_dataset = JSONDataset(json_file_paths[-1], transform=custom_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Setup NN Model, Loss function & Validation Function

In [6]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.fc(out[:, -1, :])
        return out

In [7]:
model = LSTMModel(5,128,5).to(device)

criterion = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0)

In [8]:
def test(loader, model, criterion):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.unsqueeze(1)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Training Loop

In [9]:
for epoch in range(NUM_EPOCHS):
    model.train()
    print(f'==> Epoch {epoch+1}')
    
    for loader in train_loaders:
        for inputs, targets in tqdm(loader):
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
    
    val_loss = test(val_loader, model, criterion)
    print(f'Validation Loss: {val_loss:.3f}')

==> Epoch 1


100%|██████████| 39/39 [00:00<00:00, 117.62it/s]
100%|██████████| 38/38 [00:00<00:00, 265.74it/s]
100%|██████████| 39/39 [00:00<00:00, 265.29it/s]
100%|██████████| 42/42 [00:00<00:00, 264.15it/s]


Validation Loss: 69.038
==> Epoch 2


100%|██████████| 39/39 [00:00<00:00, 263.52it/s]
100%|██████████| 38/38 [00:00<00:00, 275.37it/s]
100%|██████████| 39/39 [00:00<00:00, 276.60it/s]
100%|██████████| 42/42 [00:00<00:00, 269.23it/s]


Validation Loss: 24.658
==> Epoch 3


100%|██████████| 39/39 [00:00<00:00, 258.28it/s]
100%|██████████| 38/38 [00:00<00:00, 281.51it/s]
100%|██████████| 39/39 [00:00<00:00, 204.79it/s]
100%|██████████| 42/42 [00:00<00:00, 250.93it/s]


Validation Loss: 14.066
==> Epoch 4


100%|██████████| 39/39 [00:00<00:00, 261.75it/s]
100%|██████████| 38/38 [00:00<00:00, 285.71it/s]
100%|██████████| 39/39 [00:00<00:00, 274.64it/s]
100%|██████████| 42/42 [00:00<00:00, 278.17it/s]


Validation Loss: 9.244
==> Epoch 5


100%|██████████| 39/39 [00:00<00:00, 268.96it/s]
100%|██████████| 38/38 [00:00<00:00, 238.99it/s]
100%|██████████| 39/39 [00:00<00:00, 232.14it/s]
100%|██████████| 42/42 [00:00<00:00, 253.01it/s]


Validation Loss: 6.264
==> Epoch 6


100%|██████████| 39/39 [00:00<00:00, 246.20it/s]
100%|██████████| 38/38 [00:00<00:00, 253.33it/s]
100%|██████████| 39/39 [00:00<00:00, 236.36it/s]
100%|██████████| 42/42 [00:00<00:00, 245.61it/s]


Validation Loss: 4.522
==> Epoch 7


100%|██████████| 39/39 [00:00<00:00, 236.37it/s]
100%|██████████| 38/38 [00:00<00:00, 243.59it/s]
100%|██████████| 39/39 [00:00<00:00, 231.44it/s]
100%|██████████| 42/42 [00:00<00:00, 212.12it/s]


Validation Loss: 3.494
==> Epoch 8


100%|██████████| 39/39 [00:00<00:00, 230.77it/s]
100%|██████████| 38/38 [00:00<00:00, 243.58it/s]
100%|██████████| 39/39 [00:00<00:00, 230.77it/s]
100%|██████████| 42/42 [00:00<00:00, 245.61it/s]


Validation Loss: 2.736
==> Epoch 9


100%|██████████| 39/39 [00:00<00:00, 248.42it/s]
100%|██████████| 38/38 [00:00<00:00, 269.50it/s]
100%|██████████| 39/39 [00:00<00:00, 270.82it/s]
100%|██████████| 42/42 [00:00<00:00, 276.32it/s]


Validation Loss: 2.173
==> Epoch 10


100%|██████████| 39/39 [00:00<00:00, 268.97it/s]
100%|██████████| 38/38 [00:00<00:00, 273.38it/s]
100%|██████████| 39/39 [00:00<00:00, 268.88it/s]
100%|██████████| 42/42 [00:00<00:00, 276.31it/s]


Validation Loss: 1.901
==> Epoch 11


100%|██████████| 39/39 [00:00<00:00, 291.01it/s]
100%|██████████| 38/38 [00:00<00:00, 261.82it/s]
100%|██████████| 39/39 [00:00<00:00, 284.67it/s]
100%|██████████| 42/42 [00:00<00:00, 280.00it/s]


Validation Loss: 1.574
==> Epoch 12


100%|██████████| 39/39 [00:00<00:00, 276.60it/s]
100%|██████████| 38/38 [00:00<00:00, 273.38it/s]
100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 42/42 [00:00<00:00, 278.15it/s]


Validation Loss: 1.336
==> Epoch 13


100%|██████████| 39/39 [00:00<00:00, 270.84it/s]
100%|██████████| 38/38 [00:00<00:00, 273.38it/s]
100%|██████████| 39/39 [00:00<00:00, 293.23it/s]
100%|██████████| 42/42 [00:00<00:00, 289.65it/s]


Validation Loss: 1.208
==> Epoch 14


100%|██████████| 39/39 [00:00<00:00, 274.65it/s]
100%|██████████| 38/38 [00:00<00:00, 283.58it/s]
100%|██████████| 39/39 [00:00<00:00, 274.65it/s]
100%|██████████| 42/42 [00:00<00:00, 278.15it/s]


Validation Loss: 1.176
==> Epoch 15


100%|██████████| 39/39 [00:00<00:00, 268.96it/s]
100%|██████████| 38/38 [00:00<00:00, 281.48it/s]
100%|██████████| 39/39 [00:00<00:00, 280.56it/s]
100%|██████████| 42/42 [00:00<00:00, 267.52it/s]


Validation Loss: 1.033
==> Epoch 16


100%|██████████| 39/39 [00:00<00:00, 278.57it/s]
100%|██████████| 38/38 [00:00<00:00, 237.50it/s]
100%|██████████| 39/39 [00:00<00:00, 284.66it/s]
100%|██████████| 42/42 [00:00<00:00, 297.87it/s]


Validation Loss: 0.879
==> Epoch 17


100%|██████████| 39/39 [00:00<00:00, 286.76it/s]
100%|██████████| 38/38 [00:00<00:00, 279.42it/s]
100%|██████████| 39/39 [00:00<00:00, 293.22it/s]
100%|██████████| 42/42 [00:00<00:00, 280.00it/s]


Validation Loss: 0.809
==> Epoch 18


100%|██████████| 39/39 [00:00<00:00, 276.59it/s]
100%|██████████| 38/38 [00:00<00:00, 287.90it/s]
100%|██████████| 39/39 [00:00<00:00, 282.61it/s]
100%|██████████| 42/42 [00:00<00:00, 285.71it/s]


Validation Loss: 0.898
==> Epoch 19


100%|██████████| 39/39 [00:00<00:00, 284.67it/s]
100%|██████████| 38/38 [00:00<00:00, 277.37it/s]
100%|██████████| 39/39 [00:00<00:00, 295.45it/s]
100%|██████████| 42/42 [00:00<00:00, 289.66it/s]


Validation Loss: 0.814
==> Epoch 20


100%|██████████| 39/39 [00:00<00:00, 276.60it/s]
100%|██████████| 38/38 [00:00<00:00, 233.13it/s]
100%|██████████| 39/39 [00:00<00:00, 272.73it/s]
100%|██████████| 42/42 [00:00<00:00, 269.23it/s]


Validation Loss: 0.746
==> Epoch 21


100%|██████████| 39/39 [00:00<00:00, 267.12it/s]
100%|██████████| 38/38 [00:00<00:00, 267.61it/s]
100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 42/42 [00:00<00:00, 270.97it/s]


Validation Loss: 0.655
==> Epoch 22


100%|██████████| 39/39 [00:00<00:00, 274.65it/s]
100%|██████████| 38/38 [00:00<00:00, 285.70it/s]
100%|██████████| 39/39 [00:00<00:00, 276.59it/s]
100%|██████████| 42/42 [00:00<00:00, 287.68it/s]


Validation Loss: 0.703
==> Epoch 23


100%|██████████| 39/39 [00:00<00:00, 232.14it/s]
100%|██████████| 38/38 [00:00<00:00, 265.72it/s]
100%|██████████| 39/39 [00:00<00:00, 254.90it/s]
100%|██████████| 42/42 [00:00<00:00, 242.77it/s]


Validation Loss: 0.548
==> Epoch 24


100%|██████████| 39/39 [00:00<00:00, 268.96it/s]
100%|██████████| 38/38 [00:00<00:00, 267.61it/s]
100%|██████████| 39/39 [00:00<00:00, 258.27it/s]
100%|██████████| 42/42 [00:00<00:00, 291.67it/s]


Validation Loss: 0.519
==> Epoch 25


100%|██████████| 39/39 [00:00<00:00, 282.61it/s]
100%|██████████| 38/38 [00:00<00:00, 275.35it/s]
100%|██████████| 39/39 [00:00<00:00, 284.68it/s]
100%|██████████| 42/42 [00:00<00:00, 278.14it/s]


Validation Loss: 0.542
==> Epoch 26


100%|██████████| 39/39 [00:00<00:00, 293.23it/s]
100%|██████████| 38/38 [00:00<00:00, 281.48it/s]
100%|██████████| 39/39 [00:00<00:00, 270.83it/s]
100%|██████████| 42/42 [00:00<00:00, 276.32it/s]


Validation Loss: 0.493
==> Epoch 27


100%|██████████| 39/39 [00:00<00:00, 272.73it/s]
100%|██████████| 38/38 [00:00<00:00, 273.38it/s]
100%|██████████| 39/39 [00:00<00:00, 268.97it/s]
100%|██████████| 42/42 [00:00<00:00, 281.87it/s]


Validation Loss: 0.454
==> Epoch 28


100%|██████████| 39/39 [00:00<00:00, 278.58it/s]
100%|██████████| 38/38 [00:00<00:00, 279.41it/s]
100%|██████████| 39/39 [00:00<00:00, 282.59it/s]
100%|██████████| 42/42 [00:00<00:00, 287.68it/s]


Validation Loss: 0.482
==> Epoch 29


100%|██████████| 39/39 [00:00<00:00, 276.59it/s]
100%|██████████| 38/38 [00:00<00:00, 283.58it/s]
100%|██████████| 39/39 [00:00<00:00, 298.93it/s]
100%|██████████| 42/42 [00:00<00:00, 225.81it/s]


Validation Loss: 0.547
==> Epoch 30


100%|██████████| 39/39 [00:00<00:00, 280.58it/s]
100%|██████████| 38/38 [00:00<00:00, 287.88it/s]
100%|██████████| 39/39 [00:00<00:00, 286.76it/s]
100%|██████████| 42/42 [00:00<00:00, 276.31it/s]


Validation Loss: 0.386
==> Epoch 31


100%|██████████| 39/39 [00:00<00:00, 280.57it/s]
100%|██████████| 38/38 [00:00<00:00, 275.38it/s]
100%|██████████| 39/39 [00:00<00:00, 293.23it/s]
100%|██████████| 42/42 [00:00<00:00, 278.15it/s]


Validation Loss: 0.359
==> Epoch 32


100%|██████████| 39/39 [00:00<00:00, 284.65it/s]
100%|██████████| 38/38 [00:00<00:00, 290.08it/s]
100%|██████████| 39/39 [00:00<00:00, 278.57it/s]
100%|██████████| 42/42 [00:00<00:00, 283.79it/s]


Validation Loss: 0.415
==> Epoch 33


100%|██████████| 39/39 [00:00<00:00, 282.61it/s]
100%|██████████| 38/38 [00:00<00:00, 301.59it/s]
100%|██████████| 39/39 [00:00<00:00, 276.59it/s]
100%|██████████| 42/42 [00:00<00:00, 269.23it/s]


Validation Loss: 0.393
==> Epoch 34


100%|██████████| 39/39 [00:00<00:00, 261.74it/s]
100%|██████████| 38/38 [00:00<00:00, 281.49it/s]
100%|██████████| 39/39 [00:00<00:00, 277.57it/s]
100%|██████████| 42/42 [00:00<00:00, 293.70it/s]


Validation Loss: 0.310
==> Epoch 35


100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 38/38 [00:00<00:00, 273.39it/s]
100%|██████████| 39/39 [00:00<00:00, 282.61it/s]
100%|██████████| 42/42 [00:00<00:00, 297.87it/s]


Validation Loss: 0.318
==> Epoch 36


100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 38/38 [00:00<00:00, 274.72it/s]
100%|██████████| 39/39 [00:00<00:00, 284.67it/s]
100%|██████████| 42/42 [00:00<00:00, 287.42it/s]


Validation Loss: 0.314
==> Epoch 37


100%|██████████| 39/39 [00:00<00:00, 295.46it/s]
100%|██████████| 38/38 [00:00<00:00, 237.50it/s]
100%|██████████| 39/39 [00:00<00:00, 270.62it/s]
100%|██████████| 42/42 [00:00<00:00, 281.88it/s]


Validation Loss: 0.281
==> Epoch 38


100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 38/38 [00:00<00:00, 275.36it/s]
100%|██████████| 39/39 [00:00<00:00, 309.52it/s]
100%|██████████| 42/42 [00:00<00:00, 274.50it/s]


Validation Loss: 0.331
==> Epoch 39


100%|██████████| 39/39 [00:00<00:00, 293.24it/s]
100%|██████████| 38/38 [00:00<00:00, 296.87it/s]
100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 42/42 [00:00<00:00, 283.78it/s]


Validation Loss: 0.295
==> Epoch 40


100%|██████████| 39/39 [00:00<00:00, 291.04it/s]
100%|██████████| 38/38 [00:00<00:00, 281.49it/s]
100%|██████████| 39/39 [00:00<00:00, 288.89it/s]
100%|██████████| 42/42 [00:00<00:00, 279.99it/s]

Validation Loss: 0.288





# Testing Metrics

In [10]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputs = inputs.unsqueeze(1) 
        outputs = model(inputs)
        
        targets_np = targets.cpu().numpy()
        outputs_np = outputs.cpu().numpy()
        
        true_labels.extend(targets_np)
        predictions.extend(outputs_np)

predictions = np.array(predictions)
true_labels = np.array(true_labels)

mse = mean_squared_error(true_labels, predictions)
mae = mean_absolute_error(true_labels, predictions)
r2 = r2_score(true_labels, predictions)

print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)

Mean Squared Error (MSE): 0.49089733
Mean Absolute Error (MAE): 0.1764529
R-squared (R2): 0.9849273562431335


# Plot Predicted vs Actual Season Statistics

In [None]:
import matplotlib.pyplot as plt

for i, metric in enumerate(['PTS', 'REB', 'AST', 'STL', 'BLK']):
    plt.figure(figsize=(10, 6))
    plt.plot(true_labels[:, i], label='Actual', marker='o')
    plt.plot(np.maximum(np.round(predictions[:, i]), 0), label='Predicted', marker='x')
    plt.title(f'{metric}: Predicted vs Actual')
    plt.xlabel('Game')
    plt.ylabel(metric)
    plt.legend()
    plt.show()



# Find Game Specific Stats

In [None]:
def find_game_index_by_date(target_game_date):
    with open(json_file_paths[5], 'r') as f:
        data = json.load(f)
    
    for index, game in enumerate(data):
        if game.get('GAME_DATE') == target_game_date:
            return index
    
    # Return None if game date is not found
    return None

In [None]:
def plot_predictions_vs_actuals_for_game(model, loader, device, game_date):
    model.eval()
    game_index = find_game_index_by_date(game_date)
    
    # Get inputs and targets for the specified game
    inputs, targets = next(iter(loader))
    inputs, targets = inputs[game_index].unsqueeze(0).unsqueeze(0).to(device), targets[game_index].unsqueeze(0).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(inputs)
        predictions = outputs.cpu().numpy()
        true_labels = targets.cpu().numpy()
    
    rounded_predictions = np.maximum(np.round(predictions.flatten()), 0)

    plt.figure(figsize=(8, 5))
    plt.plot(true_labels.flatten(), label='Actual', marker='o')
    plt.plot(rounded_predictions, label='Predicted', marker='x')

    print("Predicted Values:", rounded_predictions)
    print("Actual Values:", true_labels.flatten())

    metric_names = ['PTS', 'REB', 'AST', 'STL', 'BLK']  # Example metric names
    plt.xticks(np.arange(len(metric_names)), metric_names)  # Set the ticks and labels

    plt.title(f'Predicted vs Actual for Game {game_date}')
    plt.xlabel('Metric')
    plt.legend()
    plt.show()

plot_predictions_vs_actuals_for_game(model, test_loader, device, "2024-06-09")


# Predict future game score

In [None]:
def json_to_numpy(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    
    metrics_list = []
    
    for game in data:
        metrics = [
            game['PTS'],
            game['REB'],
            game['AST'],
            game['STL'],
            game['BLK']
        ]
        metrics_list.append(metrics)
    
    metrics_array = np.array(metrics_list)
    
    return metrics_array

historical_data = json_to_numpy(json_file_paths[-1])

In [None]:
def predict_next_game(model, historical_data):
    model.eval()
    
    # Calculate average or median of historical data
    avg_metrics = np.mean(historical_data, axis=0)  # Calculate average along rows (axis=0)
    
    # Prepare input tensor
    input_data = torch.tensor(avg_metrics, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(input_data)
        predictions = outputs.cpu().numpy()
    
    return np.maximum(np.round(predictions.flatten()), 0)

In [None]:
# Example usage:
predicted_metrics = predict_next_game(model, historical_data)
print("Predicted Metrics for Next Game based on Historical Data:", predicted_metrics)