In [None]:
from random import randrange#to select rows randomly
from csv import reader#to load csv file
from math import sqrt#to calculate euclidean distances
from math import *
from decimal import Decimal 
''''
def p_root(value, root): 
      
    root_value = 1 / float(root) 
    return round (Decimal(value) **
             Decimal(root_value), 3) 
  
def minkowski_distance(x, y, p_value): 
    print(zip(x,y))
    return (p_root(sum(pow(abs(a-b), p_value) 
            for a, b in itertools.izip(x, y)), p_value)) '''
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return sqrt(distance)
 

def calculate_accuracy(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0
 

def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset
 

def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())

        
def str_class_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup
 
def split_data(dataset, no_of_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / no_of_folds)#getting no of rows in each split
    #so that we can choose those many no of rows randomly
    for _ in range(no_of_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split
 
def get_scores(dataset, algorithm, no_of_folds, *args):
    folds = split_data(dataset, no_of_folds)
    scores = list()
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)#we remove the current fold(i.e,test rows from train rows)
        train_set = sum(train_set, [])#to unbind lists inside lists
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = calculate_accuracy(actual, predicted)
        scores.append(accuracy)
    return scores#accuracy of all folds


def get_neighbors(train, test_row, no_of_neighbors):
    distances = list()
    #p=2
    for train_row in train:
        #dist = minkowski_distance(test_row, train_row,p)
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(no_of_neighbors):
        neighbors.append(distances[i][0])#getting rows up to n th smallest distances
    return neighbors
 

def predict_class(train, test_row, no_of_neighbors):
    neighbors = get_neighbors(train, test_row, no_of_neighbors)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)#predicting class of test row as majority class of neighbours
    return prediction
 

def k_nearest_neighboursz(train, test, no_of_neighbors):
    predictions = list()
    for row in test:
        output = predict_class(train, row, no_of_neighbors)
        predictions.append(output)
    return(predictions)#returninng class of all the rows in test based on our predictions
 

filename = 'desktop/cat1.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
str_class_to_int(dataset, len(dataset[0])-1)

no_of_folds = 30
no_of_neighbors = 30
scores = get_scores(dataset, k_nearest_neighbors, no_of_folds, no_of_neighbors)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))