In [144]:
import csv
import pandas as pd
import os

In [145]:
# Search for a marker file or directory indicating the root of the project
def find_project_root():
    current_dir = os.getcwd()

    # List of marker files or directories that indicate the root of the project
    project_markers = ['hedge_your_bets', 'SCAI_comp']

    for marker in project_markers:
        potential_root = os.path.join(current_dir, marker)
        if os.path.exists(potential_root):
            return potential_root

    # If none of the markers are found, use the current directory
    return current_dir

# Get the root of the project
project_root = find_project_root()

# Construct the full paths using os.path.join
read_file = os.path.join(project_root, 'datasets', 'nba_games.csv')
write_file = os.path.join(project_root, 'datasets', 'games.csv')



In [146]:
# Initialize an empty list to store the data
data_list = []

# Open the CSV file in read mode
with open(read_file, 'r') as csv_file:
    # Create a CSV reader
    csv_reader = csv.reader(csv_file)
    
    # Read the first row to get the column headers
    headers = next(csv_reader)
    
    # Iterate through each row in the CSV file and create a dictionary for each row
    for row in csv_reader:
        row_dict = {header: value for header, value in zip(headers, row)}
        data_list.append(row_dict)

# Open the new CSV file in write mode
with open(write_file, 'w', newline='') as new_csv_file:
    # Define the column headers based on the keys of the dictionaries
    headers = data_list[0].keys()

    # Create a CSV writer
    csv_writer = csv.DictWriter(new_csv_file, fieldnames=headers)
    
    # Write the headers to the new CSV file
    csv_writer.writeheader()
    
    # Write the data from the list of dictionaries to the new CSV file
    for row in data_list:
        csv_writer.writerow(row)

In [147]:
# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(write_file)
df = df.sort_values("date")
df = df.drop('Unnamed: 0', axis=1)

In [148]:
#Set win and loss to 0 and 1

def add_target(team):
    team["target"] = team["won"].shift(-1)
    return team

df = df.groupby("team", group_keys = False).apply(add_target)
df = df.drop('won', axis=1)


In [157]:
# Remove null rows in the "target" column and convert to integer
df.dropna(subset=["target"], inplace=True)
df["target"] = df["target"].astype(int)

# Identify columns with null values
nulls = pd.isnull(df)
columns_with_nulls = nulls.columns[nulls.any()]

# Select columns without null values
valid_columns = df.columns[~df.columns.isin(columns_with_nulls)]

# Update the DataFrame to contain only valid columns
full = df[valid_columns].copy()
full


Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,target
16086,240.0,35.0,83.0,0.422,6.0,18.0,0.333,19.0,27.0,0.704,...,69.4,43.7,206.0,104.0,GSW,111,1,2016,2015-10-27,0
16904,240.0,38.0,94.0,0.404,9.0,29.0,0.310,10.0,17.0,0.588,...,53.2,34.6,162.0,104.0,CHI,97,1,2016,2015-10-27,1
16905,240.0,37.0,87.0,0.425,7.0,19.0,0.368,16.0,23.0,0.696,...,30.4,29.0,138.0,105.0,CLE,95,0,2016,2015-10-27,1
16087,240.0,41.0,96.0,0.427,9.0,30.0,0.300,20.0,22.0,0.909,...,37.5,38.9,201.0,120.0,NOP,95,0,2016,2015-10-27,1
1225,240.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.800,...,33.3,23.6,132.0,104.0,DET,106,0,2016,2015-10-27,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16381,240.0,43.0,89.0,0.483,13.0,35.0,0.371,17.0,24.0,0.708,...,100.0,32.7,164.0,135.0,GSW,100,0,2022,2022-06-08,0
972,240.0,40.0,91.0,0.440,15.0,43.0,0.349,12.0,15.0,0.800,...,25.8,32.4,205.0,120.0,BOS,97,1,2022,2022-06-10,1
973,240.0,34.0,85.0,0.400,15.0,38.0,0.395,14.0,19.0,0.737,...,42.9,36.3,133.0,112.0,GSW,107,0,2022,2022-06-10,0
11533,240.0,41.0,88.0,0.466,9.0,40.0,0.225,13.0,15.0,0.867,...,45.0,94.4,300.0,112.0,BOS,94,0,2022,2022-06-13,1


In [158]:
# Specify features not to scale
features_not_to_scale = ['season', 'date', 'team_opp', 'home_opp', 'target']

# Select only numeric columns excluding specified features
selected_columns = full.select_dtypes(exclude=['object']).columns
selected_columns = selected_columns[~selected_columns.isin(features_not_to_scale)]

In [159]:
#Import ML models
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

In [160]:
# Split the data into training and testing sets
X = full[selected_columns].copy()
y = full['target'].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling for training set
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Identify constant features using the training set
constant_features = X_train.columns[X_train_scaled.var(axis=0) == 0]

# Drop constant features from training set
X_train_scaled = X_train_scaled[:, X_train_scaled.var(axis=0) != 0]

# Apply the same scaler transformation to the testing set
X_test_scaled = scaler.transform(X_test)

# Drop constant features from testing set
X_test_scaled = X_test_scaled[:, X_test_scaled.var(axis=0) != 0]

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.FloatTensor(y_test.values)





In [155]:
# Define a simple PyTorch model
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        # Adjust input_size to match the number of features in your input tensor
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

# Instantiate the model, define loss and optimizer
# Adjust input_size to match the number of features in your input tensor
model = SimpleNN(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 10
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.view(-1, 1))
    loss.backward()
    optimizer.step()


In [156]:
# Evaluation
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    predictions = (test_outputs >= 0.5).float()
    accuracy = (predictions == y_test_tensor.view(-1, 1)).float().mean()

print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.5539588332176208
