In [3]:
!pip install pytorch_lightning

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.1.post0-py3-none-any.whl.metadata (20 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.1.0->pytorch_lightning)
  Dow

In [4]:
import numpy as np
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pytorch_lightning as pl
from torchmetrics.classification import Accuracy

In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("nathanlauga/nba-games")

print("Path to dataset files:", path)
df = pd.read_csv("/kaggle/input/nba-games/games.csv")

Path to dataset files: /kaggle/input/nba-games


In [7]:
df.columns

Index(['GAME_DATE_EST', 'GAME_ID', 'GAME_STATUS_TEXT', 'HOME_TEAM_ID',
       'VISITOR_TEAM_ID', 'SEASON', 'TEAM_ID_home', 'PTS_home', 'FG_PCT_home',
       'FT_PCT_home', 'FG3_PCT_home', 'AST_home', 'REB_home', 'TEAM_ID_away',
       'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 'AST_away',
       'REB_away', 'HOME_TEAM_WINS'],
      dtype='object')

In [6]:
df.dropna(subset=['HOME_TEAM_WINS'], inplace=True)

In [9]:
# Drop non-numeric columns or columns that won't be used as features
# 'GAME_DATE_EST' and 'GAME_ID' are not suitable features for a basic model
# 'TEAM_ID_home', 'TEAM_ID_away' are IDs, consider using one-hot encoding or embedding if needed,
# but for a simple model, let's exclude for now or see if the model can handle them.
# Let's keep columns that are numerical and potentially relevant
feature_columns = [
    'SEASON', 'PTS_home', 'FG_PCT_home', 'FT_PCT_home', 'FG3_PCT_home',
    'AST_home', 'REB_home', 'PTS_away', 'FG_PCT_away', 'FT_PCT_away',
    'FG3_PCT_away', 'AST_away', 'REB_away'
]

# Ensure all feature columns exist in the DataFrame
feature_columns = [col for col in feature_columns if col in df.columns]

X = df[feature_columns].values
y = df['HOME_TEAM_WINS'].values

# Handle potential missing values in features (simple imputation with mean)
for i in range(X.shape[1]):
    col_mean = np.nanmean(X[:, i])
    X[np.isnan(X[:, i]), i] = col_mean

In [13]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1) # unsqueeze for BCELoss
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

In [14]:
# PyTorch Lightning DataModule
class NBAGamesDataModule(pl.LightningDataModule):
    def __init__(self, X_train, y_train, X_test, y_test, batch_size=32):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = torch.utils.data.TensorDataset(self.X_train, self.y_train)
        self.test_dataset = torch.utils.data.TensorDataset(self.X_test, self.y_test)

    def train_dataloader(self):
        return torch.utils.data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def test_dataloader(self):
        return torch.utils.data.DataLoader(self.test_dataset, batch_size=self.batch_size)

# PyTorch Lightning Model
class NBAPredictionModel(pl.LightningModule):
    def __init__(self, input_dim):
        super().__init__()
        self.layer_1 = nn.Linear(input_dim, 64)
        self.layer_2 = nn.Linear(64, 32)
        self.layer_3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.criterion = nn.BCEWithLogitsLoss() # Use BCELoss with logits for numerical stability
        self.accuracy = Accuracy(task="binary")

    def forward(self, x):
        x = self.relu(self.layer_1(x))
        x = self.relu(self.layer_2(x))
        x = self.layer_3(x) # Output logits
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.round(self.sigmoid(logits)) # Apply sigmoid and round for predictions
        acc = self.accuracy(preds, y.int()) # Ensure target is int for Accuracy metric
        self.log('train_loss', loss)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.round(self.sigmoid(logits))
        acc = self.accuracy(preds, y.int())
        self.log('test_loss', loss)
        self.log('test_acc', acc)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        return optimizer

In [15]:
# Instantiate DataModule and Model
input_dimension = X_train_tensor.shape[1]
data_module = NBAGamesDataModule(X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)
model = NBAPredictionModel(input_dim=input_dimension)

# Instantiate Trainer
trainer = pl.Trainer(max_epochs=5, accelerator='auto') # Use 'auto' to let Pytorch Lightning decide accelerator

# Train the model
trainer.fit(model, data_module)

# Test the model
trainer.test(model, data_module)

# Example prediction (using test data first element)
model.eval() # Set model to evaluation mode

INFO:pytorch_lightning.utilities.rank_zero:Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | layer_1   | Linear            | 896    | train
1 | layer_2   | Linear            | 2.1 K  | train
2 | layer_3   | Linear            | 33     | train
3 | relu      | ReLU              | 0      | train
4 | sigmoid   | Sigmoid           | 0      | train
5 | criterion | BCEWithLogitsLoss | 0      | train
6 | accuracy  | BinaryAccuracy    | 0      | train
----------------------------------------------

Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Testing: |          | 0/? [00:00<?, ?it/s]

TypeError: round(): argument 'input' (position 1) must be Tensor, not float

In [17]:
trainer.test(model, data_module)
model.eval()

Testing: |          | 0/? [00:00<?, ?it/s]

NBAPredictionModel(
  (layer_1): Linear(in_features=13, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=32, bias=True)
  (layer_3): Linear(in_features=32, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
  (criterion): BCEWithLogitsLoss()
  (accuracy): BinaryAccuracy()
)

In [None]:
with torch.no_grad():
    sample_input = X_test_tensor[0].unsqueeze(0) # Add batch dimension
    logits = model(sample_input)
    probability_of_home_win = torch.sigmoid(logits).item()
    predicted_class = torch.round(probability_of_home_win).item()

print(f"\nSample input features: {X_test_scaled[0]}")
print(f"Predicted probability of home win: {probability_of_home_win:.4f}")
print(f"Predicted outcome (0: Away Win, 1: Home Win): {int(predicted_class)}")
print(f"Actual outcome: {y_test[0]}")