In [27]:
# Importing Libraries
import quandl
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
import sys
import time
from tqdm import tqdm
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV


# Importing Data
JT_JB = pd.read_csv(r"C:\Users\richa\Downloads\JT_JB.csv")

## SVM: Team Metrics

In [9]:
# Dataframe
last_rows_svm = JT_JB.groupby(['Year','Game', 'Q']).last().reset_index()
last_rows_svm = last_rows_svm[last_rows_svm['Q']=='Q3']

svm_df = last_rows_svm[['Result', 'Year', 'OPP_FG', 'BOS_ST', 'OPP_AST', 'BOS_FG']].copy()
svm_df['Result'] = np.where(svm_df['Result'] == 'BOS', 1, 0)


In [10]:
# Split
SVM_train = svm_df[svm_df['Year']!= 2023]
SVM_test = svm_df[svm_df['Year']== 2023]

# Drop
X_train_svm = SVM_train.drop(['Result', 'Year'], axis=1)
y_train_svm = SVM_train['Result']
X_test_svm = SVM_test.drop(['Result', 'Year'], axis=1)
y_test_svm = SVM_test['Result']

In [15]:
# Normalize Predictors
scaler = MinMaxScaler()
X_train_svm_norm = pd.DataFrame(scaler.fit_transform(X_train_svm),
                        columns = X_train_svm.columns
                        )

In [28]:
# Grid
param_grid = {
    'C': [0.85],
    'kernel': ['rbf'],
    'gamma': ['scale']
}

# Model
svm_model = SVC()

#Grid search
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, n_jobs=-1)

for params in tqdm(param_grid):
    grid_search.fit(X_train_svm_norm, y_train_svm)

print("Best parameters found by during grid search:")
print(grid_search.best_params_)

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.38it/s]

Best parameters found by during grid search:
{'C': 0.85, 'gamma': 'scale', 'kernel': 'rbf'}





### Testing: Team Metrics

In [21]:
# best paramaters training
best_model = SVC(**grid_search.best_params_)
best_model.fit(X_train_svm_norm, y_train_svm)

# Normalize testing
X_test_svm_norm = pd.DataFrame(
    scaler.transform(X_test_svm),
    columns = X_test_svm.columns)

# Predict
y_pred_svm = best_model.predict(X_test_svm_norm)

# Evaluation
accuracy = accuracy_score(y_test_svm, y_pred_svm)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test_svm, y_pred_svm)
print("Confusion Matrix:\n", conf_matrix)

auc_score = roc_auc_score(y_test_svm, y_pred_svm)
print("ROC AUC Score:", auc_score)

Accuracy: 0.7848101265822784
Confusion Matrix:
 [[18  6]
 [11 44]]
ROC AUC Score: 0.775


## SVM: Player Metrics

In [22]:
# Dataframe
last_rows_svm_jtjb = JT_JB.groupby(['Year','Game', 'Q']).last().reset_index()
last_rows_svm_jtjb = last_rows_svm_jtjb[last_rows_svm_jtjb['Q']=='Q3']

svm_jtjb = last_rows_svm_jtjb[['Result', 'Year', 'JT_FG', 'JB_FG']].copy()
svm_jtjb['Result'] = np.where(svm_jtjb['Result'] == 'BOS', 1, 0)

In [23]:
# Split
SVM_train_jtjb = svm_jtjb[svm_jtjb['Year']!= 2022]
SVM_test_jtjb = svm_jtjb[svm_jtjb['Year']== 2022]

# Drop
X_train_svm_jtjb = SVM_train_jtjb.drop(['Result', 'Year'], axis=1)
y_train_svm_jtjb = SVM_train_jtjb['Result']
X_test_svm_jtjb = SVM_test_jtjb.drop(['Result', 'Year'], axis=1)
y_test_svm_jtjb = SVM_test_jtjb['Result']

In [24]:
# Normalize Predictors
scaler = MinMaxScaler()
X_train_svm_jtjb_norm = pd.DataFrame(scaler.fit_transform(X_train_svm_jtjb),
                        columns = X_train_svm_jtjb.columns
                        )

In [25]:
# Grid
param_grid = {
    'C': [0.85],
    'kernel': ['rbf'],
    'gamma': ['scale']
}

# Model
svm_model = SVC()

# Grid search
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, n_jobs=-1)

for params in tqdm(param_grid):
    grid_search.fit(X_train_svm_jtjb_norm, y_train_svm_jtjb)

print("Best parameters found by during grid search:")
print(grid_search.best_params_)




100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 29.57it/s]

Best parameters found by during grid search:
{'C': 0.85, 'gamma': 'scale', 'kernel': 'rbf'}





### Testing: Player Metrics

In [26]:
# Best parameters training
best_model = SVC(**grid_search.best_params_)
best_model.fit(X_train_svm_jtjb, y_train_svm_jtjb)

# Normalize testing
X_test_svm_jtjb_norm = pd.DataFrame(
    scaler.transform(X_test_svm_jtjb),
    columns = X_test_svm_jtjb.columns)

# Predict
y_pred_svm = best_model.predict(X_test_svm_jtjb_norm)

# Evaluation
accuracy = accuracy_score(y_test_svm_jtjb, y_pred_svm)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test_svm_jtjb, y_pred_svm)
print("Confusion Matrix:\n", conf_matrix)

auc_score = roc_auc_score(y_test_svm_jtjb, y_pred_svm)
print("ROC AUC Score:", auc_score)

Accuracy: 0.6190476190476191
Confusion Matrix:
 [[ 0 40]
 [ 0 65]]
ROC AUC Score: 0.5
