In [None]:
import numpy as np
import pandas as pd
import math as mt
import matplotlib.pyplot as plt

eps = np.finfo(float).eps

In [None]:
def euclidianDistance(row1 , row2):
    distance = 0
    for i in range(1 , row2.shape[0] - 1):
        distance += np.square(row1[i]-row2[i])
    return np.sqrt(distance)

In [None]:
def manhattanDistance(row1 , row2):
    distance = 0
    for i in range(1 , row2.shape[0] - 1):
        distance += abs(row1[i]-row2[i])
    return np.sqrt(distance)

In [None]:
def minkowskiDistance(row1 , row2):
    distance = 0
    for i in range(1 , row2.shape[0] - 1):
        distance += (abs(row1[i]-row2[i]))**3
    return np.sqrt(distance)

In [None]:
def distanceCalculator(df, row , distUsing = 'euclidian'):
    distance = []
    index = []
    dist = 0
    for i in range(0 ,len(df)):
        if distUsing == 'euclidian':
            dist = euclidianDistance(df.loc[i,:] , row)
        if distUsing == 'manhattan':
            dist = manhattanDistance(df.loc[i,:] , row)
        if distUsing == 'minkowski':
            dist = minkowskiDistance(df.loc[i,:] , row)
        distance.append(dist)
        index.append(i)
    return [distance, index]

In [None]:
def sortedDistanceDF(df , distance_index):
    df['distance'] = distance_index[0]
    df['index'] = distance_index[1]
    df = df.sort_values(['distance','index'])
    return df

In [None]:
def firstK(df,k):
    return df.head(k)

In [None]:
def majority(df):
    clValue,counts = np.unique(df[Class],return_counts=True)  
    return clValue[np.argmax(counts)]

In [None]:
def KNN_algo(df, row, k, distUsing = 'euclidian'):
    distance_index = distanceCalculator(df, row, distUsing)
    df = sortedDistanceDF(df , distance_index)
    df = firstK(df,k)
    predict = majority(df);
    return predict


In [None]:
def accuracy(confusion):
    correct = 0
    total = 0
    for i in range(2):
        for j in range(2):
            if i==j:
                correct += confusion[i][j]
            total += confusion[i][j]
    
    return 100*correct/(float(total))


In [None]:
def scores(ClassValue, confusion):
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0
    for predict in range(0,2):
        for actual in range(0,2):
            if predict == ClassList.index(ClassValue) and actual == ClassList.index(ClassValue):
                true_positive += confusion[predict][actual]
            elif predict == ClassList.index(ClassValue) and actual != ClassList.index(ClassValue):
                false_positive += confusion[predict][actual]
            elif predict != ClassList.index(ClassValue) and actual == ClassList.index(ClassValue):
                false_negative += confusion[predict][actual]
            else:
                true_negative += confusion[predict][actual]
    return true_positive, true_negative, false_positive, false_negative

In [None]:
def recall(true_positive , false_negative):
    return true_positive*100/(true_positive +  false_negative+ eps)

In [None]:
def precision(true_positive , false_positive):
    return true_positive*100/(true_positive +  false_positive + eps)

In [None]:
def f1score(recall , prescision):
    return 2/(1/(float(recall)+eps)+1/(float(prescision)+eps))

In [None]:
def plotter(x_axis, y_axis):
    plt.figure(num=None, figsize=(6, 4), dpi=150, facecolor='w', edgecolor='k')
    # plotting the points  
    plt.plot(x_axis, y_axis) 
    # naming the x axis 
    plt.xlabel('K') 
    # naming the y axis 
    plt.ylabel('Accuracy') 

    # giving a title to my graph 
    plt.title('Accuracy-vs-K') 
    plt.grid(True)
    # function to show the plot 
    plt.show()

In [None]:
def validation(trainingSet, validationSet, distUsing = 'euclidian'):
    listK = []
    listACC = []
    lastK = int(np.sqrt(len(trainingSet)))
    print("Validating data set using ",distUsing," distance ")
    for k in range(1 , lastK + 1):
        confusion = [[0,0],[0,0]]
        for index, row in validationSet.iterrows():
            result = KNN_algo(trainingSet,row,k, distUsing)
            confusion[ClassList.index(result)][ClassList.index(row[Class])] += 1
        print("---------------------------------------------------------------------------------------------------")
        print("                                          FOR K = ", k)
        print("---------------------------------------------------------------------------------------------------")
        print("confusion matrix: ")
        for lst in confusion:
            for i in lst:
                print(i," ",end='')
            print("\n")
        print("----------------------------------------------------------------------")
        print(" ")
        acc = accuracy(confusion)
        print ("ACCURACY: " , acc)
        for ClassValue in ClassList:
            true_positive, true_negative, false_positive, false_negative = scores(ClassValue , confusion)
            rec = recall(true_positive , false_negative)
            pre = precision(true_positive , false_positive)
            f1 = f1score(rec, pre)
            print(" ")
            print("------------Class Value ",ClassValue,"Scores are------------")
            print(" ")
            print("RECALL: ", rec)
            print("PRECISION: ", pre)
            print("F1-score: ", f1)
        print(" ")
        print("--END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END-END--")
        print(" ")
        listK.append(k)
        listACC.append(acc)
    plotter(listK, listACC)
    return listK, listACC

        

In [None]:
randomDataSet = dataSet = pd.read_csv("./../input_data/AdmissionDataset/data.csv")
# randomDataSet = dataSet.sample(frac=1).reset_index(drop=True)
Class = "Chance of Admit"
columns = ['Serial No.' , 'GRE Score' , 'TOEFL Score' , 'University Rating' , 'SOP' , 'LOR' , 'CGPA' , 'Research' , Class]
randomDataSet.columns = columns
columns = columns[1:]
randomDataSet = randomDataSet[columns]

randomDataSet

In [None]:
ClassBinaryList = []
for i in range(0 , len(randomDataSet)):
    if randomDataSet.iloc[i][-1] >= 0.5:
        ClassBinaryList.append(1)
    else:
        ClassBinaryList.append(0)
randomDataSet[Class] = ClassBinaryList

In [None]:
randomDataSet

In [None]:
trainingSet, validationSet = np.split(randomDataSet, [int(0.8*len(randomDataSet))])
clvalue,counts = np.unique(randomDataSet[Class],return_counts=True) 
ClassList = list(clvalue)
# print(ClassList)
# print(ClassList.index('Iris-virginica'))

In [None]:
validation(trainingSet, validationSet)

In [None]:
validation(trainingSet, validationSet, 'manhattan')

In [None]:
validation(trainingSet, validationSet, 'minkowski')