# KNN Classifier without PCA

# KNN Classifier with PCA

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score

In [17]:
#loading in the datasets
df_training = pd.read_csv('../data/archive/train_pca.csv')
df_testing = pd.read_csv('../data/archive/test_pca.csv')

In [18]:
#Separating X and y training and testing
x_train = df_training.iloc[:, :-1]
y_train = df_training.iloc[:,-1]

x_test = df_testing.iloc[:, :-1]
y_test = df_testing.iloc[:,-1]

In [19]:
#Initializing KNN Classifier

knn = KNeighborsClassifier()

In [20]:
#Making a grid of values we want our grid search to test to find the best parameters
grid_values ={'n_neighbors': list(range(1,105,4))}

knn_classifier = GridSearchCV(estimator = knn, param_grid = grid_values, cv = StratifiedKFold(n_splits = 5),
                      scoring = ['accuracy', 'f1_micro', 'precision_micro'], refit = False, verbose = 0)

knn_model = knn_classifier.fit(x_train, y_train)

In [21]:
knn_model.cv_results_

{'mean_fit_time': array([0.00779915, 0.01036196, 0.00926781, 0.00988321, 0.00890841,
        0.01171417, 0.0100287 , 0.01055102, 0.01079803, 0.00857372,
        0.00815048, 0.00718832, 0.00864463, 0.00790672, 0.00907526,
        0.01094232, 0.01010971, 0.00964613, 0.00942779, 0.00787311,
        0.00944066, 0.00775318, 0.00999484, 0.00988102, 0.00665317,
        0.00894017]),
 'std_fit_time': array([0.00062934, 0.00278185, 0.001386  , 0.00189866, 0.0027887 ,
        0.00326792, 0.00295654, 0.00169891, 0.00259249, 0.00210847,
        0.00365426, 0.00311393, 0.00331996, 0.00254109, 0.00272382,
        0.00334087, 0.00297206, 0.00155099, 0.00255295, 0.00151097,
        0.0028815 , 0.00209041, 0.00045916, 0.0021205 , 0.00226205,
        0.0024546 ]),
 'mean_score_time': array([0.38760242, 0.46804323, 0.46323209, 0.46786628, 0.47907562,
        0.51007748, 0.47697182, 0.47743573, 0.50410433, 0.48108668,
        0.49105558, 0.48919058, 0.48858457, 0.48506465, 0.48962927,
        0.49576144, 

In [9]:
#initializing our lists so we can later store the best params that achieve best scores across metrics
accuracy_best_params = []
f1_best_params = []
precision_best_params = []

accuracy_best_params.append(knn_model.cv_results_['params'][ np.argmin(knn_model.cv_results_['rank_test_accuracy'])])
f1_best_params.append(knn_model.cv_results_['params'][ np.argmin(knn_model.cv_results_['rank_test_f1_micro'])])
precision_best_params.append(knn_model.cv_results_['params'][ np.argmin(knn_model.cv_results_['rank_test_precision_micro'])])

In [10]:
accuracy_best_params

[{'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1}]

In [11]:
f1_best_params

[{'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1}]

In [12]:
precision_best_params

[{'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1},
 {'n_neighbors': 1}]

In [39]:
accuracy_test_score = []

knn_clf = KNeighborsClassifier(n_neighbors = accuracy_best_params[0].get('n_neighbors'))
model = knn_clf.fit(x_train_full, y_train_full)

y_pred = model.predict(x_test)

accuracy_test_score.append(accuracy_score(y_test, y_pred))

In [40]:
accuracy_test_score

[0.8853070919579233]

In [21]:
f1_test_score = []

knn_clf = KNeighborsClassifier(n_neighbors = f1_best_params[0].get('n_neighbors'))
model = knn_clf.fit(x_train, y_train)

y_pred = model.predict(x_test)

f1_test_score.append(f1_score(y_test, y_pred, average = 'micro'))

In [22]:
f1_test_score

[0.856124872751951]

In [24]:
precision_test_score = []

knn_clf = KNeighborsClassifier(n_neighbors = precision_best_params[0].get('n_neighbors'))
model = knn_clf.fit(x_train, y_train)

y_pred = model.predict(x_test)

precision_test_score.append(precision_score(y_test, y_pred, average = 'micro'))

In [25]:
precision_test_score

[0.8561248727519511]