# Imports

In [None]:
from tqdm import tqdm
import numpy as np
import json
import os
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from nba_api.stats.static import players
from nba_api.stats.endpoints import leaguegamefinder, playercareerstats

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Fetch NBA Player Data

In [None]:
def fetchPlayerData(playerName):
    player = players.find_players_by_full_name(playerName)

    if player:
        playerID = player[0]['id']
        careerStats = playercareerstats.PlayerCareerStats(player_id=playerID)
        careerStatsDF = careerStats.get_data_frames()[0]
        seasons = careerStatsDF['SEASON_ID'].unique().tolist()
    else:
        print(f"Player {playerName} not found")
        exit()

    for season in seasons:
        gameFinder = leaguegamefinder.LeagueGameFinder(player_id_nullable=playerID, season_nullable=season)
        desiredColumns = ['TEAM_NAME', 'GAME_DATE', 'MATCHUP','WL', 'PTS', 'REB', 'AST', 'STL', 'BLK']
        games = games = gameFinder.get_data_frames()[0][desiredColumns]

        gamesDict = games.to_dict(orient='records')

        # Store data in JSON file
        fileName = f"data/raw/{playerName} {season} regular season games.json"
        with open(fileName, 'w') as JSONFile:
            json.dump(gamesDict, JSONFile, indent=4)

        print(f"Game data stored in {fileName}")

playerNameInput = input("Enter player name:")
fetchPlayerData(playerName=playerNameInput)

# Preprocess Data

In [None]:
class JSONDataset(Dataset):
    def __init__(self, JSONFile, transform=None):
        with open(JSONFile, 'r', encoding='utf-8') as f:
            self.data = json.load(f)
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if not self.data:
            raise IndexError("Dataset is empty or not loaded properly.")
        
        sample = self.data[idx]
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [None]:
def customTransform(sample):
    inputData = torch.tensor([
        sample['PTS'], 
        sample['REB'], 
        sample['AST'], 
        sample['STL'], 
        sample['BLK']
    ], dtype=torch.float32)
    
    target = inputData.clone()
    
    return inputData, target

In [None]:
def getJSONFilePaths(directory):
    JSONFilePaths = []
    for root, dir, files in os.walk(directory):
        for file in files:
            if file.endswith(".json"):
                relPath = os.path.join(root, file)
                JSONFilePaths.append(os.path.normpath(relPath))
    return JSONFilePaths

In [None]:
JSONFilePaths = getJSONFilePaths("./data/raw")
print("Relative JSON file paths:")
print(JSONFilePaths)

In [None]:
trainDatasets = [JSONDataset(json_file, transform=customTransform) for json_file in JSONFilePaths[0:-2]]
trainLoaders = [DataLoader(dataset, batch_size=2, shuffle=True) for dataset in trainDatasets]

valDataset = JSONDataset(JSONFilePaths[-2], transform=customTransform)
valLoader = DataLoader(valDataset, batch_size=2, shuffle=False)

testDataset = JSONDataset(JSONFilePaths[-1], transform=customTransform)
testLoader = DataLoader(testDataset, batch_size=2, shuffle=False)

# Define LSTM Model

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, inputSize=5, hiddenSize=128, outputSize=5, numLayers=1):
        super().__init__()
        self.hiddenSize = hiddenSize
        self.numLayers= 1
        self.lstm = nn.LSTM(inputSize, hiddenSize, numLayers, batch_first=True)
        self.fc = nn.Linear(hiddenSize, outputSize)
    
    def forward(self, x):
        h0 = torch.zeros(self.numLayers, x.size(0), self.hiddenSize).to(x.device)
        c0 = torch.zeros(self.numLayers, x.size(0), self.hiddenSize).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        return self.fc(out[:, -1, :])

model = LSTMModel().to(device)

# Training Loop

In [None]:
def testModel(loader, model, criterion):
    model.eval()
    with torch.no_grad():
        totalLoss = 0
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.unsqueeze(1)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            totalLoss += loss.item()
    return totalLoss / len(loader)

In [None]:
def trainModel(NUM_EPOCHS = 40, lr = 0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0)

    for epoch in range(NUM_EPOCHS):
        model.train()
        print(f'==> Epoch {epoch+1}')
        
        for loader in trainLoaders:
            for inputs, targets in tqdm(loader):
                inputs, targets = inputs.to(device), targets.to(device)
                inputs = inputs.unsqueeze(1)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
        
        lossVal = testModel(valLoader, model, criterion)
        print(f'Validation Loss: {lossVal:.3f}')

trainModel()

# Testing Metrics

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

model.eval()
predictions = []
trueLabels = []

with torch.no_grad():
    for inputs, targets in testLoader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputs = inputs.unsqueeze(1) 
        outputs = model(inputs)
        
        targetsNp = targets.cpu().numpy()
        outputsNp = outputs.cpu().numpy()
        
        trueLabels.extend(targetsNp)
        predictions.extend(outputsNp)

predictionsNp = np.array(predictions)
trueLabelsNp = np.array(trueLabels)

mse = mean_squared_error(trueLabelsNp, predictionsNp)
mae = mean_absolute_error(trueLabelsNp, predictionsNp)
r2 = r2_score(trueLabelsNp, predictionsNp)

print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)

# Plot Predicted vs Actual Season Statistics

In [None]:
import matplotlib.pyplot as plt

for i, metric in enumerate(['PTS', 'REB', 'AST', 'STL', 'BLK']):
    plt.figure(figsize=(10, 6))
    plt.plot(trueLabelsNp[:, i], label='Actual', marker='o')
    plt.plot(np.maximum(np.round(predictionsNp[:, i]), 0), label='Predicted', marker='x')
    plt.title(f'{metric}: Predicted vs Actual')
    plt.xlabel('Game')
    plt.ylabel(metric)
    plt.legend()
    plt.show()



# Predict future game statistics

In [None]:
def JSONToNumpy(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    
    metricsList = []
    
    for game in data:
        metrics = [
            game['PTS'],
            game['REB'],
            game['AST'],
            game['STL'],
            game['BLK']
        ]
        metricsList.append(metrics)
    
    metricsArray = np.array(metricsList)
    
    return metricsArray

historicalData = JSONToNumpy(JSONFilePaths[-1])

In [None]:
def predictNextGame(model, historicalData):
    model.eval()
    
    avg_metrics = np.mean(historicalData, axis=0)
    
    input_data = torch.tensor(avg_metrics, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(input_data)
        predictions = outputs.cpu().numpy()
    
    return np.maximum(np.round(predictions.flatten()), 0)

In [None]:
predictedMetrics = predictNextGame(model, historicalData)
print(f"Predicted Metrics for Next Game based on Historical Data: { predictedMetrics } ")