# Logistic Regression Models

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# gather the train and test data
train_data = pd.read_csv('../../../data/season-data/2012_to_2023_data.csv')
test_data = pd.read_csv('../../../data/season-data/combined_2024.csv')

train_data = pd.get_dummies(train_data, columns=['team', 'team_opp'])
test_data = pd.get_dummies(test_data, columns=['team', 'team_opp'])


# Convert 'date' column to datetime type
train_data['date'] = pd.to_datetime(train_data['date'])
test_data['date'] = pd.to_datetime(test_data['date'])

# Extract features
train_data['year'] = train_data['date'].dt.year
train_data['month'] = train_data['date'].dt.month
train_data['day'] = train_data['date'].dt.day
train_data['day_of_week'] = train_data['date'].dt.dayofweek

test_data['year'] = test_data['date'].dt.year
test_data['month'] = test_data['date'].dt.month
test_data['day'] = test_data['date'].dt.day
test_data['day_of_week'] = test_data['date'].dt.dayofweek

train_data.fillna(train_data.mean(), inplace=True)
test_data.fillna(test_data.mean(), inplace=True)

features = ['fg', 'fga', 'fg%', '3p', '3pa', '3p%', 'ft', 'fta', 'ft%', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'fg_opp', 'fga_opp', 'fg%_opp', '3p_opp', '3pa_opp', '3p%_opp', 'ft_opp', 'fta_opp', 'ft%_opp', 'orb_opp', 'drb_opp', 'trb_opp', 'ast_opp', 'stl_opp', 'blk_opp', 'tov_opp', 'pf_opp']
X_train = train_data[features]
Y_train = train_data['won']

X_test = test_data[features]
Y_test = test_data['won']

model = LogisticRegression(random_state= 0, max_iter=2000)

model.fit(X_train, Y_train)

predictions = model.predict(X_test)

accuracy = accuracy_score(Y_test, predictions)

conf_matrix = confusion_matrix(Y_test, predictions)
print("Confusion Matrix:\n", conf_matrix)

print("Accuracy:", accuracy)

Confusion Matrix:
 [[1228    0]
 [   0 1228]]
Accuracy: 1.0
