In [178]:
#Import Libraries
import csv
import pandas as pd
import os
import torch
from sklearn.preprocessing import MinMaxScaler
import torch.nn as nn 
import torch.optim as optim

In [179]:
#Read csv and load into a dataframe
relative_path = 'datasets/nba_games.csv'
read_file = os.path.join(os.getcwd(), relative_path)
df = pd.read_csv(read_file, index_col = 0)

In [180]:
#Sort by date and reset index
df = df.sort_values("date")
df = df.reset_index(drop=True)
#Drop Irrelevant columns
del df["mp.1"]
del df["mp_opp.1"]
del df["index_opp"]

In [181]:
# Add a target column: "Whether team won next game"
def add_target(group):
    group = pd.concat([group, group["won"].shift(-1).rename("target")], axis=1)
    return group

df = df.groupby("team", group_keys=False).apply(add_target)
df.loc[pd.isnull(df["target"]), "target"] = 2
df["target"] = df["target"].astype(int, errors="ignore")


In [182]:
nulls = pd.isnull(df).sum()
nulls = nulls[nulls > 0]
valid_columns = df.columns[~df.columns.isin(nulls.index)]
df = df[valid_columns].copy()

In [183]:
#Select stat columns
removed_columns = ["season", "date", "won", "target", "team", "team_opp"]
selected_columns = df.columns[~df.columns.isin(removed_columns)]

#Scale stat columns
scaler = MinMaxScaler()
df[selected_columns] = scaler.fit_transform(df[selected_columns])

In [184]:
#Calculate rolling averages of stat columns
rolling = df[list(selected_columns) + ["won", "team", "season"]]

def find_team_averages(team):
    # Apply rolling mean only to numeric columns
    team[selected_columns] = team[selected_columns].rolling(10).mean()
    return team

# Group by team and season, then apply the rolling average function
rolling = rolling.groupby(["team", "season"], group_keys=False).apply(find_team_averages)
rolling_cols = [f"{col}_10" for col in rolling.columns]
rolling.columns = rolling_cols

In [185]:
#Concatenate new columns back into dataframe, dropping null rows and resetting index
df = pd.concat([df,rolling], axis = 1)
df = df.dropna()
df = df.reset_index(drop=True)

In [186]:
#Add future game data to columns

def shift_col(team, col_name):
    next_col = team[col_name].shift(-1)
    return next_col

def add_col(df, col_name):
    return df.groupby("team", group_keys=False).apply(lambda x: shift_col(x, col_name))

df["home_next"] = add_col(df, "home")
df["team_opp_next"] = add_col(df, "team_opp")
df["date_next"] = add_col(df, "date")

In [187]:
full = df.merge(df[rolling_cols + ["team_opp_next", "date_next", "team"]], left_on=["team", "date_next"], right_on=["team_opp_next", "date_next"])

In [177]:
for column in full.columns:
    print(column)

mp
fg
fga
fg%
3p
3pa
3p%
ft
fta
ft%
orb
drb
trb
ast
stl
blk
tov
pf
pts
ts%
efg%
3par
ftr
orb%
drb%
trb%
ast%
stl%
blk%
tov%
usg%
ortg
drtg
fg_max
fga_max
fg%_max
3p_max
3pa_max
3p%_max
ft_max
fta_max
ft%_max
orb_max
drb_max
trb_max
ast_max
stl_max
blk_max
tov_max
pf_max
pts_max
+/-_max
ts%_max
efg%_max
3par_max
ftr_max
orb%_max
drb%_max
trb%_max
ast%_max
stl%_max
blk%_max
tov%_max
usg%_max
ortg_max
drtg_max
team_x
total
home
mp_opp
fg_opp
fga_opp
fg%_opp
3p_opp
3pa_opp
3p%_opp
ft_opp
fta_opp
ft%_opp
orb_opp
drb_opp
trb_opp
ast_opp
stl_opp
blk_opp
tov_opp
pf_opp
pts_opp
ts%_opp
efg%_opp
3par_opp
ftr_opp
orb%_opp
drb%_opp
trb%_opp
ast%_opp
stl%_opp
blk%_opp
tov%_opp
usg%_opp
ortg_opp
drtg_opp
fg_max_opp
fga_max_opp
fg%_max_opp
3p_max_opp
3pa_max_opp
3p%_max_opp
ft_max_opp
fta_max_opp
ft%_max_opp
orb_max_opp
drb_max_opp
trb_max_opp
ast_max_opp
stl_max_opp
blk_max_opp
tov_max_opp
pf_max_opp
pts_max_opp
+/-_max_opp
ts%_max_opp
efg%_max_opp
3par_max_opp
ftr_max_opp
orb%_max_opp
drb%_max_opp


In [189]:
full[["team_x", "team_10_x", "team_opp_next_x", "team_y", "team_opp_next_y", "date_next","home_next", "target"]]

Unnamed: 0,team_x,team_10_x,team_opp_next_x,team_y,team_opp_next_y,date_next,home_next,target
0,SAC,SAC,TOR,TOR,SAC,2015-11-15,1.0,1
1,TOR,TOR,SAC,SAC,TOR,2015-11-15,0.0,0
2,CLE,CLE,DET,DET,CLE,2015-11-17,0.0,0
3,GSW,GSW,TOR,TOR,GSW,2015-11-17,1.0,1
4,DEN,DEN,NOP,NOP,DEN,2015-11-17,0.0,1
...,...,...,...,...,...,...,...,...
15769,BOS,BOS,GSW,GSW,BOS,2022-06-10,1.0,0
15770,GSW,GSW,BOS,BOS,GSW,2022-06-13,1.0,1
15771,BOS,BOS,GSW,GSW,BOS,2022-06-13,0.0,0
15772,GSW,GSW,BOS,BOS,GSW,2022-06-16,0.0,1


In [147]:
removed_columns = list(full.columns[full.dtypes == "object"]) + removed_columns
selected_columns = full.columns[~full.columns.isin(removed_columns)]

In [165]:
for column in full.columns:
    print(column)

mp
fg
fga
fg%
3p
3pa
3p%
ft
fta
ft%
orb
drb
trb
ast
stl
blk
tov
pf
pts
ts%
efg%
3par
ftr
orb%
drb%
trb%
ast%
stl%
blk%
tov%
usg%
ortg
drtg
fg_max
fga_max
fg%_max
3p_max
3pa_max
3p%_max
ft_max
fta_max
ft%_max
orb_max
drb_max
trb_max
ast_max
stl_max
blk_max
tov_max
pf_max
pts_max
+/-_max
ts%_max
efg%_max
3par_max
ftr_max
orb%_max
drb%_max
trb%_max
ast%_max
stl%_max
blk%_max
tov%_max
usg%_max
ortg_max
drtg_max
team_x
total
home
mp_opp
fg_opp
fga_opp
fg%_opp
3p_opp
3pa_opp
3p%_opp
ft_opp
fta_opp
ft%_opp
orb_opp
drb_opp
trb_opp
ast_opp
stl_opp
blk_opp
tov_opp
pf_opp
pts_opp
ts%_opp
efg%_opp
3par_opp
ftr_opp
orb%_opp
drb%_opp
trb%_opp
ast%_opp
stl%_opp
blk%_opp
tov%_opp
usg%_opp
ortg_opp
drtg_opp
fg_max_opp
fga_max_opp
fg%_max_opp
3p_max_opp
3pa_max_opp
3p%_max_opp
ft_max_opp
fta_max_opp
ft%_max_opp
orb_max_opp
drb_max_opp
trb_max_opp
ast_max_opp
stl_max_opp
blk_max_opp
tov_max_opp
pf_max_opp
pts_max_opp
+/-_max_opp
ts%_max_opp
efg%_max_opp
3par_max_opp
ftr_max_opp
orb%_max_opp
drb%_max_opp


In [None]:
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x
    
    
# Define a custom PyTorch dataset
class NBADataset(Dataset):
    def __init__(self, features, target):
        self.features = features
        self.target = target

    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        x = torch.tensor(self.features[idx], dtype=torch.float32)
        y = torch.tensor(self.target[idx], dtype=torch.float32)
        return x, y
    
# Assume df is your dataframe with the added columns
# Extract features and target
features_columns = ["team_rolling_10", "team_opp_rolling_10", "home_next"]
target_column = "target"  

features = df[features_columns].values
target = df[target_column].values

# Define dataset and dataloader
dataset = NBADataset(features, target)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Set the device for PyTorch (assuming GPU is available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the seasons range (2016 - 2022)
seasons_range = range(2016, 2023)

# Loop through each season
for cutoff_season in seasons_range:
    # Divide the data into training and testing sets
    train_df = full[full["season"] < cutoff_season]
    test_df = full[full["season"] == cutoff_season]

    # Extract features and target for training and testing sets
    train_features = train_df[features_columns].values
    train_target = train_df[target_column].values

    test_features = test_df[features_columns].values
    test_target = test_df[target_column].values

    # Define datasets and dataloaders for training and testing
    train_dataset = NBADataset(train_features, train_target)
    test_dataset = NBADataset(test_features, test_target)

    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Initialize and train the model
    net = YourNeuralNetwork()  # replace with the instantiation of your neural network
    net.to(device)

    criterion = nn.BCELoss()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        for inputs, labels in train_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels.unsqueeze(1))

            loss.backward()
            optimizer.step()

    # Evaluate the model on the test set
    net.eval()
    with torch.no_grad():
        test_inputs, test_labels = test_dataloader
        test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)

        test_outputs = net(test_inputs)
        test_loss = criterion(test_outputs, test_labels.unsqueeze(1))

    print(f"Season: {cutoff_season}, Test Loss: {test_loss.item():.4f}")


