In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split  
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score , classification_report, confusion_matrix
import seaborn as sns
import pickle
import numpy as np 
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
from collections import Counter

### data prep

In [2]:
data=pd.read_csv("data/cleaned_data.csv")

In [3]:
def grouping_(x):
    if x<=30:
        return 0
    elif 30<x<300:
        return 50
    else:
        return 100
    
data['Degree_rotation'] = data['Degree_rotation'].apply(lambda x :grouping_(x))

In [4]:
x= data.iloc[:,:-1].values  
y= data.iloc[:,-1].values  

### data preprocessing

In [16]:
# transform the dataset
oversample = SMOTE()
x, y = oversample.fit_resample(x, y)
# summarize distribution
counter = Counter(y)
for k,v in counter.items():
    per = v / len(y) * 100
    print('Class=%d, n=%d (%.3f%%)' % (k, v, per))
# plot the distribution`
plt.bar(counter.keys(), counter.values())
plt.show()

In [6]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0, shuffle=True,stratify=y) 

### model training

In [7]:
knn5= KNeighborsClassifier(n_neighbors=1)

In [17]:
knn5.fit(x_train,y_train)

### model evaluation

In [9]:
y_pred_knn=knn5.predict(x_test)

In [18]:
y_pred_knn

In [11]:
accuracy = accuracy_score(y_test, y_pred_knn)
print("Accuracy Score:", accuracy)
print()
print(classification_report(y_test, y_pred_knn))

Accuracy Score: 0.9020236763805947

              precision    recall  f1-score   support

           0       0.85      0.93      0.89     15797
          50       0.91      0.78      0.84     15796
         100       0.95      0.99      0.97     15796

    accuracy                           0.90     47389
   macro avg       0.90      0.90      0.90     47389
weighted avg       0.90      0.90      0.90     47389



In [19]:
import numpy as np
from sklearn.metrics import confusion_matrix

def accuracy_for_each_element(confusion_matrix):
    num_classes = len(confusion_matrix)
    accuracy_matrix = np.zeros((num_classes, num_classes), dtype=float)
    for i in range(num_classes):
        for j in range(num_classes):
            accuracy_matrix[i, j] = round(cm[i, j]/cm.sum(axis=1)[i], 2)
    return accuracy_matrix

cm = confusion_matrix(y_test, y_pred_knn)
# print('Confusion matrix:\n', cm)

accuracy_matrix = accuracy_for_each_element(cm)
# print("\nAccuracy Matrix for Each Element:", accuracy_matrix)

group_counts = [value for value in accuracy_matrix.flatten()]

labels = [v1 for v1 in group_counts]
labels = np.asarray(labels).reshape(3,3)

ht=sns.heatmap(cm, annot=labels, fmt='', cmap='Blues')
ht.set_xticklabels(['0', '50', '100'])
ht.set_yticklabels(['0', '50', '100'])

plt.title(f'KNN Model Size - 16.4MB\n{round(accuracy*100, 2)} accuracy')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

### save model

In [13]:
f='model_zoo/knn.pkl'
pickle.dump(knn5,open(f,'wb'))

In [14]:
model=pickle.load(open('model_zoo/knn.pkl','rb'))
input_data = np.array([-722,-1918,-194,-15,-8,-1])
result= model.predict([input_data])

In [15]:
result[0]

100