In [14]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from datetime import datetime
import matplotlib.pyplot as plt

In [15]:
df = pd.read_csv('Preprocessed_data.csv', index_col = 0)
df

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income,Diabetes_binary
0,0,0,1,23,0,0,0,1,0,0,...,0,1,0,0,0,0,11,5,7,0
1,1,0,1,19,0,0,0,0,1,1,...,0,3,0,0,0,0,6,6,8,0
2,0,0,1,26,1,0,0,1,1,1,...,0,2,0,0,0,0,1,4,4,0
3,0,1,1,22,0,0,0,1,1,1,...,0,1,0,0,0,1,12,4,2,0
4,0,0,1,22,0,0,0,0,1,1,...,0,1,0,0,0,0,4,6,8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253659,0,1,1,37,0,0,0,0,0,1,...,0,4,0,0,0,0,6,4,1,1
253668,0,1,1,29,1,0,1,0,1,1,...,0,2,0,0,1,1,10,3,6,1
253670,1,1,1,25,0,0,1,0,1,0,...,0,5,15,0,1,0,13,6,4,1
253676,1,1,1,18,0,0,0,0,0,0,...,0,4,0,0,1,0,11,2,4,1


In [16]:
X = df.iloc[:,0:-1]
y = df.loc[:, 'Diabetes_binary']
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 1)

In [17]:
accuracy_list = []
accuracy_metric_list = []
accuracy_k_list = []
precision_list = []
precision_metric_list = []
precision_k_list = []
f1_list = []
f1_metric_list = []
f1_k_list = []

In [18]:
#k_range = [i for i in range(0,105)]
#k_value = k_range[0::5]
k_value = [i for i in range(1, 30)]

## Trial 1

In [19]:
start = datetime.now()

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 2

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 2)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 3

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 3)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 4

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 4)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 5

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 5)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 6

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 6)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 7

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 7)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 8

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 8)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 9

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 9)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

## Trial 10

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=10000, shuffle=True, random_state = 10)

In [None]:
acc = []
f1 = []
precision = []
error_rate = []
for i in k_value:
    neigh = KNeighborsClassifier(n_neighbors = i)
    parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
    search_results =  GridSearchCV(neigh, parameters)
    search_results.fit(X_train, Y_train)
    Y_Predicted = search_results.predict(X_test)
    acc.append(search_results.score(X_test, Y_test))
    f1.append(f1_score(Y_test, Y_Predicted))
    precision.append(precision_score(Y_test, Y_Predicted))
    error_rate.append(np.mean(Y_Predicted != Y_test))
plt.figure(figsize=(15,10))
plt.plot(k_value,error_rate, marker='o', markersize=9)

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[acc.index(max(acc))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_metric = search_results.best_params_["metric"]
accuracy = search_results.score(X_test, Y_test)
k_accuracy = k_value[acc.index(max(accuracy))]

accuracy_list.append(accuracy)
accuracy_metric_list.append(opt_metric)
accuracy_k_list.append(k_accuracy)
print(f'Accuracy for optimum classifier(metric={opt_metric}, kvalue= {k_accuracy}): {accuracy}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[f1.index(max(f1))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_f1_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
f1_Score = f1_score(Y_test,Y_Predicted)
k_f1_score = k_value[f1.index(max(f1))]

f1_list.append(f1_Score)
f1_metric_list.append(opt_f1_metric)
f1_k_list.append(k_f1_score)

print(f'F1 Score for optimum classifier(metric={opt_f1_metric}, kvalue= {k_f1_score}): {f1_Score}')

In [None]:
# initialize our classifier
parameters = {"metric":['euclidean' ,'manhattan','chebyshev','minkowski']}
knn = KNeighborsClassifier(n_neighbors=k_value[precision.index(max(precision))])
search_results =  GridSearchCV(knn, parameters)

# fit the classifier with the training data
search_results.fit(X_train, Y_train)

In [None]:
#get best parameter for metric and print
opt_precision_metric = search_results.best_params_["metric"]
Y_Predicted = search_results.predict(X_test)
Precision_Score = precision_score(Y_test,Y_Predicted)
k_precision = k_value[precision.index(max(precision))]

precision_list.append(Precision_Score)
precision_metric_list.append(opt_precision_metric)
precision_k_list.append(k_precision)

print(f'Precision Score for optimum classifier(metric={opt_precision_metric}, kvalue= {k_precision}): {Precision_Score}')

In [None]:
end = datetime.now()
time_taken = end - start
print('Time: ',time_taken) 

## Results

In [None]:
data = {'Accuracy': accuracy_list, 'Accuracy K': accuracy_k_list, 'Accuracy Metric': accuracy_metric_list,
        'Precision': precision_list, 'Precision K': precision_k_list, 'Precision Metric': precision_metric_list,
        'F1_micro':f1_list, 'F1_micro K': f1_k_list, 'F1_micro Metric': f1_metric_list}

In [None]:
trainingResults  = pd.DataFrame(data = data)
pd.options.display.max_colwidth = 100
trainingResults

In [None]:
trainingResults.to_csv('KNN_trainingResults.csv')