# k-nearest neighbors, Monk

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import make_scorer,mean_squared_error,accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sn
import numpy as np

In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


In [None]:
from sklearn.metrics import ConfusionMatrixDisplay,classification_report

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:


def ReadMonkFile(s):
    column=['class','a1','a2','a3','a4','a5','a6','Id']
    monkData=pd.read_csv(s,sep=" ", names=column,dtype=str)
    monkData.set_index('Id', inplace=True)
    return monkData

## Function to compute the mean eucledian error

In [None]:
def mean_euclidean_error(y_true, y_pred):
     errors= np.sqrt(np.sum((y_true - y_pred) ** 2))
     return np.mean(errors)

In [None]:
scoring=make_scorer(mean_euclidean_error,greater_is_better=False)

### Function to obtain the best parameter k for knn

In [None]:

def KNN_monk(x_train, y_train, x_test, y_test, n):
    for i in range(1, n+1):  # Loop through values from 1 to n inclusive
        # Initialize KNN classifier
        knn = KNeighborsClassifier(n_neighbors=i)

        # Train the classifier on the training data
        knn.fit(x_train, y_train)

        # Predict on the test data
        y_pred = knn.predict(x_test)

        # Calculate the accuracy of the model
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy of KNN Classifier with {i} neighbors: {accuracy:.2f}")



In [None]:
maxK = 10

# MONK 1

## Read the Dataset

In [None]:
Train=ReadMonkFile("Dataset_Monk/monks-1.train")
Test=ReadMonkFile("Dataset_Monk/monks-1.test")


In [None]:
featureTrain=Train.iloc[:,1:7]
TargetTrain=Train.iloc[:,0]
featureTrain = pd.get_dummies(featureTrain)
featureTest=Test.iloc[:,1:7]
featureTest = pd.get_dummies(featureTest)
TargetTest=Test.iloc[:,0]
TargetTrain=TargetTrain.astype('uint8')
TargetTest=TargetTest.astype('uint8')


In [None]:
KNN_monk(featureTrain, TargetTrain, featureTest, TargetTest, 10)

## Grid Search for parameters

In [None]:

params={
  'n_neighbors':np.arange(1,30),
  'leaf_size':[10, 20, 25, 30, 35],
  'weights':['uniform', 'distance'],
  'algorithm':['auto','ball_tree', 'kd_tree', 'brute'],
  'metric': ['minkowski', 'euclidean', 'manhattan', 'chebyshev', 'l1', 'l2'],
}
knn = KNeighborsClassifier()
knn = GridSearchCV(estimator=knn, param_grid=params, scoring='accuracy', cv=3, verbose=4, n_jobs=-1)
knn.fit(featureTrain, TargetTrain)

In [None]:
knn.best_params_

{'algorithm': 'auto',
 'leaf_size': 10,
 'metric': 'minkowski',
 'n_neighbors': 8,
 'weights': 'uniform'}

In [None]:
knOptimm1=knn.best_estimator_

### Result on train and test

In [None]:

knOptimm1.fit(featureTrain, TargetTrain)
trainPred=knOptimm1.predict(featureTrain)
accuracy=accuracy_score(TargetTrain,trainPred)
print('Accuracy Train',accuracy)
print('MEE Train', mean_euclidean_error(TargetTrain.values, trainPred))

## Test
testPred=knOptimm1.predict(featureTest)
print(classification_report(TargetTest, testPred))
accuracy=accuracy_score(TargetTest,testPred)
print('Accuracy Test',accuracy)
print('MEE Test', mean_euclidean_error(TargetTest.values, testPred))


In [None]:
ConfusionMatrixDisplay.from_estimator(knOptimm1, featureTest, TargetTest)
plt.show()

# MONK 2

In [None]:
Train=ReadMonkFile("Dataset_Monk/monks-2.train")
Test=ReadMonkFile("Dataset_Monk/monks-2.test")


In [None]:
featureTrain=Train.iloc[:,1:7]
TargetTrain=Train.iloc[:,0]
featureTrain = pd.get_dummies(featureTrain)
featureTest=Test.iloc[:,1:7]
featureTest = pd.get_dummies(featureTest)
TargetTest=Test.iloc[:,0]
TargetTrain=TargetTrain.astype('uint8')
TargetTest=TargetTest.astype('uint8')


In [None]:
KNN_monk(featureTrain, TargetTrain, featureTest, TargetTest, 22)

### Grid Search for parameters

In [None]:

params={
  'n_neighbors':np.arange(1,30),
  'algorithm':['kd_tree','ball_tree','auto', 'brute'],
  'leaf_size':[10, 15,20,30, 35],
  'weights':['uniform', 'distance'],
  'metric': ['minkowski', 'euclidean', 'manhattan', 'chebyshev', 'l1', 'l2'],


}
knn = KNeighborsClassifier()
knn = GridSearchCV(estimator=knn, param_grid=params, scoring='accuracy', cv=3, verbose=4, n_jobs=-1)
knn.fit(featureTrain, TargetTrain)

In [None]:
knn.best_params_

### Result on  Train and Test

In [None]:
knOptimm2=knn.best_estimator_
knOptimm2.fit(featureTrain, TargetTrain)
trainPred=knOptimm2.predict(featureTrain)
accuracy=accuracy_score(TargetTrain,trainPred)
print('Accuracy Train',accuracy)
print('MEE Train', mean_euclidean_error(TargetTrain.values, trainPred))


testPred=knOptimm2.predict(featureTest)
print(classification_report(TargetTest, testPred))
accuracy=accuracy_score(TargetTest,testPred)
print('Accuracy Test',accuracy)
print('MEE Test', mean_euclidean_error(TargetTest.values, testPred))

In [None]:
ConfusionMatrixDisplay.from_estimator(knOptimm2, featureTest, TargetTest)
plt.show()

# MONK 3

In [None]:
Train=ReadMonkFile("Dataset_Monk/monks-3.train")
Test=ReadMonkFile("Dataset_Monk/monks-3.test")


In [None]:
featureTrain=Train.iloc[:,1:7]
TargetTrain=Train.iloc[:,0]
featureTrain = pd.get_dummies(featureTrain)
featureTest=Test.iloc[:,1:7]
featureTest = pd.get_dummies(featureTest)
TargetTest=Test.iloc[:,0]
TargetTrain=TargetTrain.astype('uint8')
TargetTest=TargetTest.astype('uint8')

In [None]:
KNN_monk(featureTrain, TargetTrain, featureTest, TargetTest, 10)

### Grid Search for parameters

In [None]:

params={
  'n_neighbors':np.arange(1,30),
  'algorithm':['kd_tree','ball_tree','auto', 'brute'],
  'leaf_size':[10, 15,20,30, 35],
  'weights':['uniform', 'distance'],
  'metric': ['minkowski', 'euclidean', 'manhattan', 'chebyshev', 'l1', 'l2'],


}
knn = KNeighborsClassifier()
knn = GridSearchCV(estimator=knn, param_grid=params, scoring='accuracy', cv=3, verbose=4, n_jobs=-1)
knn.fit(featureTrain, TargetTrain)

In [None]:
knn.best_params_

### Result on Test

In [None]:
knOptimm3=knn.best_estimator_
knOptimm3.fit(featureTrain, TargetTrain)
trainPred=knOptimm3.predict(featureTrain)
accuracy=accuracy_score(TargetTrain,trainPred)
print('Accuracy Train',accuracy)
print('MEE Train', mean_euclidean_error(TargetTrain.values, trainPred))


testPred=knOptimm3.predict(featureTest)
print(classification_report(TargetTest, testPred))
accuracy=accuracy_score(TargetTest,testPred)
print('Accuracy',accuracy)
print('MEE', mean_euclidean_error(TargetTest.values, testPred))

In [None]:
ConfusionMatrixDisplay.from_estimator(knOptimm3, featureTest, TargetTest)
plt.show()