In [3]:
# Importing libraries
import quandl
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
import sys
import time
from tqdm import tqdm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Importing Data
JT_JB = pd.read_csv(r"C:\Users\richa\Downloads\JT_JB.csv")

## KNN: Team Metrics

In [4]:
# Dataframe creation
last_rows_knn = JT_JB.groupby(['Year','Game', 'Q']).last().reset_index()
last_rows_knn = last_rows_knn[last_rows_knn['Q']=='Q3']

knn_df = last_rows_knn[['Result', 'Year', 'OPP_FG', 'BOS_FG']].copy()
knn_df['Result'] = np.where(knn_df['Result'] == 'BOS', 1, 0)

In [5]:
# Split
KNN_train = knn_df[knn_df['Year']!= 2023]
KNN_test = knn_df[knn_df['Year']== 2023]

# Drop
X_train_knn = KNN_train.drop(['Result', 'Year'], axis=1)
y_train_knn = KNN_train['Result']
X_test_knn = KNN_test.drop(['Result', 'Year'], axis=1)
y_test_knn = KNN_test['Result']

In [6]:
# Normalize Predictors
scaler = MinMaxScaler()
X_train_knn_norm = pd.DataFrame(scaler.fit_transform(X_train_knn),
                        columns = X_train_knn.columns
                        )

In [7]:
# Grid
param_grid = {
    'n_neighbors': [12, 7, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['manhattan', 'euclidean']
}

knn_model = KNeighborsClassifier()

In [8]:
# Grid search
grid_search = GridSearchCV(estimator=knn_model, param_grid=param_grid, cv=5, n_jobs=-1)


for params in tqdm(param_grid):
    grid_search.fit(X_train_knn_norm, y_train_knn)

print("Best parameters found during grid search:")
print(grid_search.best_params_)

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:06<00:00,  2.06s/it]

Best parameters found during grid search:
{'metric': 'manhattan', 'n_neighbors': 12, 'weights': 'uniform'}





### Testing: Team Metrics

In [9]:
# Normalize the Testing set
X_test_knn_norm = pd.DataFrame(scaler.transform(X_test_knn),
                        columns = X_test_knn.columns
                        )

# Predict
best_model = grid_search.best_estimator_
y_pred_knn = best_model.predict(X_test_knn_norm)

# Evaluation
accuracy = accuracy_score(y_test_knn, y_pred_knn)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test_knn, y_pred_knn)
print("Confusion Matrix:\n", conf_matrix)

auc_score = roc_auc_score(y_test_knn, y_pred_knn)
print("ROC AUC Score:", auc_score)

Accuracy: 0.7088607594936709
Confusion Matrix:
 [[18  6]
 [17 38]]
ROC AUC Score: 0.7204545454545455


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


## KNN: Player Metrics

In [10]:
# Dataframe creation
last_rows_knn = JT_JB.groupby(['Year','Game', 'Q']).last().reset_index()
last_rows_knn = last_rows_knn[last_rows_knn['Q']=='Q3']

knn_df = last_rows_knn[['Result', 'Year', 'JT_FG', 'JB_FG']].copy()
knn_df['Result'] = np.where(knn_df['Result'] == 'BOS', 1, 0)

In [11]:
# Split
KNN_train = knn_df[knn_df['Year']!= 2023]
KNN_test = knn_df[knn_df['Year']== 2023]

# Drop target
X_train_knn = KNN_train.drop(['Result', 'Year'], axis=1)
y_train_knn = KNN_train['Result']
X_test_knn = KNN_test.drop(['Result', 'Year'], axis=1)
y_test_knn = KNN_test['Result']

In [12]:
# Normalize Predictors
scaler = MinMaxScaler()
X_train_knn_norm = pd.DataFrame(scaler.fit_transform(X_train_knn),
                        columns = X_train_knn.columns
                        )

In [13]:
# Grid
param_grid = {
    'n_neighbors': [5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['manhattan', 'euclidean']
}

# Model
knn_model = KNeighborsClassifier()

In [14]:
# Grid search
grid_search = GridSearchCV(estimator=knn_model, param_grid=param_grid, cv=5, n_jobs=-1)


for params in tqdm(param_grid):
    grid_search.fit(X_train_knn_norm, y_train_knn)

print("Best parameters found during grid search:")
print(grid_search.best_params_)

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.35it/s]

Best parameters found during grid search:
{'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'uniform'}





### Testing: Player Metrics

In [15]:
# Normalize the Testing set
X_test_knn_norm = pd.DataFrame(scaler.transform(X_test_knn),
                        columns = X_test_knn.columns
                        )

# Predict
best_model = grid_search.best_estimator_
y_pred_knn = best_model.predict(X_test_knn_norm)

# Evaluation
accuracy = accuracy_score(y_test_knn, y_pred_knn)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test_knn, y_pred_knn)
print("Confusion Matrix:\n", conf_matrix)

auc_score = roc_auc_score(y_test_knn, y_pred_knn)
print("ROC AUC Score:", auc_score)

Accuracy: 0.6962025316455697
Confusion Matrix:
 [[ 0 24]
 [ 0 55]]
ROC AUC Score: 0.5


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
