In [0]:
import numpy as np
import pandas as pd
import math

In [0]:
def get_data(filename,class_column,ratio):
    """
        Returns train_df , val_df, test_df , dict_map
        filename     : Relative filename of the directory
        class_column : Name of the class column
        ratio        : [train_ratio , val_ratio , test_ratio ]
    """
    
    df = pd.read_csv(filename,header=None)
    
    train = { 'x' : [] , 'y' : [] }
    val = { 'x' : [] , 'y' : [] }
    test = { 'x' : [] , 'y' : [] }4
    
    classes  = df[class_column].unique().tolist()
    dict_map = dict(enumerate(classes))
    
    for index,clss in dict_map.items():
        
        clss_df = df[df[class_column] == clss].sample(frac=1)
        clss_df = clss_df.drop(columns=class_column)
        n_rows = len(clss_df) 
        
        train_idx_end = math.ceil(n_rows*ratio[0])
        val_idx_end = train_idx_end + math.ceil(n_rows*ratio[1])
        
        train['x'].append(clss_df.iloc[0 : train_idx_end].values.reshape(-1,4,1))
        val['x'].append(clss_df.iloc[train_idx_end : val_idx_end].values.reshape(-1,4,1))
        test['x'].append(clss_df.iloc[val_idx_end :].values.reshape(-1,4,1))      
    
        train['y'].append(np.repeat(index,train_idx_end))
        val['y'].append(np.repeat(index,val_idx_end-train_idx_end))
        test['y'].append(np.repeat(index,n_rows-val_idx_end))
    
    train['x'] = np.concatenate(train['x'])
    train['y'] = np.concatenate(train['y'])
    val['x']   = np.concatenate(val['x'])
    val['y']   = np.concatenate(val['y'])
    test['x']  = np.concatenate(test['x'])
    test['y']  = np.concatenate(test['y'])
    
    return train,val,test,dict_map
    
    

In [0]:
train,val,test,dict_map = get_data("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",4,[0.6,0.2,0.2])

In [0]:
def get_partition_indices(train_y,dict_map):

    indices = [0]
    last = 0 
    no_classes = len(dict_map)
    for i in range(train_y.shape[0]):
      if train_y[i] != last :
        last = train_y[i]
        no_classes -= 1
        indices.append(i)
      if no_classes == 0 :
        break 
    indices.append(train_y.shape[0])
    return indices

def l2_distance(vec1,vec2):

    return np.linalg.norm(vec1-vec2)

def centroid_neighbors(train_x,train_y,test_x_row,dict_map,indices):

    distances = []
    for clss in range(len(dict_map)):
      start, end = indices[clss],indices[clss+1]
      centroid = np.mean(train_x[start:end],axis=0)
      distance = l2_distance(centroid,test_x_row)
      distances.append((clss,distance))
    
    return distances


def farthest_neighbors(train_x,train_y,test_x_row,dict_map,indices):

    dist = []
    for clss in range(len(dict_map)):
      distances = []
      for i in range(indices[clss],indices[clss+1]):
        distance = l2_distance(train_x[i],test_x_row)
        distances.append((train_x[i],train_y[i],distance))

      dist.append(max(distances,key = lambda x : x[2]))

    return dist

def k_neighbors(train_x,train_y,test_x_row,k):
    
    distances = list()
    for i in range(train_x.shape[0]):
        distance = l2_distance(train_x[i],test_x_row)
        distances.append((train_x[i],train_y[i],distance))
    distances.sort(key = lambda x : x[2])
    return distances[0:k]

def predict(train_x,train_y,test_x_row,k):
    """
      To predict the test data's class ,(Every class in the training data should be in continous range)
      train_x : Training Data Input         
      train_y : Training Data Ouput Class 
      test_x_row : Testing Data Input
      k : No of Neighbors
    """
    
    k_neigh = k_neighbors(train_x,train_y,test_x_row,k)
    classes = list(map( lambda x : x[1],k_neigh))
    knn_pred = max(set(classes),key = classes.count)

    indices = get_partition_indices(train_y,dict_map)

    fnn_neigh = farthest_neighbors(train_x,train_y,test_x_row,dict_map,indices)
    fnn_pred = min(fnn_neigh, key = lambda x : x[2])[1]

    cnn_neigh = centroid_neighbors(train_x,train_y,test_x_row,dict_map,indices)
    cnn_pred = min(cnn_neigh, key = lambda x : x[1])[0]

    

    return [knn_pred,fnn_pred,cnn_pred]

def validation(train,val,k):
    
    knn_accuracy = 0 
    fnn_accuracy = 0
    cnn_accuracy = 0
    accuracy = 0 

    df = pd.DataFrame(columns=["K-NN","F-NN","C-NN","Prediction","Actual Class",])
    for i in range(val['x'].shape[0]):
        
        pred_list = predict(train['x'],train['y'],val['x'][i],k)
        pred = max(pred_list,key = pred_list.count)
        pred_list.append(pred)
        pred_list.append(val['y'][i])

        if pred_list[0] == val['y'][i]:
            knn_accuracy += 1
        if pred_list[1] == val['y'][i]:
            fnn_accuracy += 1
        if pred_list[2] == val['y'][i]:
            cnn_accuracy += 1
        if pred_list[3] == val['y'][i]:
            accuracy += 1
        df.loc[i] = pred_list
        
    
    knn_accuracy = (knn_accuracy*100)/(val['x'].shape[0])
    fnn_accuracy = (fnn_accuracy*100)/(val['x'].shape[0])
    cnn_accuracy = (cnn_accuracy*100)/(val['x'].shape[0])
    accuracy = (accuracy*100)/(val['x'].shape[0])
    print("k = {} , K-NN Accuracy = {} , F-NN Accuracy = {} , C-NN Accuracy = {}, Accuracy = {}".format(k,knn_accuracy,fnn_accuracy,cnn_accuracy,accuracy))
    # return df

def test_fn(train,test,k):
    

    knn_accuracy = 0 
    fnn_accuracy = 0
    cnn_accuracy = 0
    accuracy = 0 

    df = pd.DataFrame(columns=["K-NN","F-NN","C-NN","Prediction","Actual Class",])
    for i in range(test['x'].shape[0]):
        
        pred_list = predict(train['x'],train['y'],test['x'][i],k)
        pred = max(pred_list,key = pred_list.count)
        pred_list.append(pred)
        pred_list.append(test['y'][i])

        if pred_list[0] == test['y'][i]:
            knn_accuracy += 1
        if pred_list[1] == test['y'][i]:
            fnn_accuracy += 1
        if pred_list[2] == test['y'][i]:
            cnn_accuracy += 1
        if pred_list[3] == test['y'][i]:
            accuracy += 1
        df.loc[i] = pred_list
        
    
    knn_accuracy = (knn_accuracy*100)/(test['x'].shape[0])
    fnn_accuracy = (fnn_accuracy*100)/(test['x'].shape[0])
    cnn_accuracy = (cnn_accuracy*100)/(test['x'].shape[0])
    accuracy = (accuracy*100)/(test['x'].shape[0])
    print("k = {} , K-NN Accuracy = {} , F-NN Accuracy = {} , C-NN Accuracy = {}, Accuracy = {}".format(k,knn_accuracy,fnn_accuracy,cnn_accuracy,accuracy))
    # return df


    

In [0]:
a = predict(train['x'],train['y'],test['x'][25],4)

In [0]:
for k in range(1,15):
  validation(train,val,k)


k = 1 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 96.66666666666667
k = 2 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 96.66666666666667
k = 3 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 96.66666666666667
k = 4 , K-NN Accuracy = 96.66666666666667 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 93.33333333333333
k = 5 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 96.66666666666667
k = 6 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 96.66666666666667
k = 7 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.33333333333333 , C-NN Accuracy = 96.66666666666667, Accuracy = 96.66666666666667
k = 8 , K-NN Accuracy = 100.0 , F-NN Accuracy = 93.3333333333333

In [0]:
for k in range(1,15):
  test_fn(train,test,k)
  # print("k = {} , Accuracy = {}".format(k,accuracy))

k = 1 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 2 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 3 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 4 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 5 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 6 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 7 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 8 , K-NN Accuracy = 93.33333333333333 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 9 , K-NN Accuracy = 96.66666666666667 , F-NN Accuracy = 80.0 , C-NN Accuracy = 80.0, Accuracy = 80.0
k = 10 , K-NN Accuracy = 93.33333333333333 , F-NN Accur