In [1]:
#Loading Data from respective files
import csv
def loadDataset(trainfile,testfile,trainingset=[],testset=[]):
    #Loading train file and appending data row-wise into trainingset
    with open(trainfile,'r') as file:
        lines=csv.reader(file)
        dataset=list(lines)
        for x in range(1,len(dataset)):
            for y in range(2):
                dataset[x][y]=float(dataset[x][y])
            trainingset.append(dataset[x])
    #Loading test file and appending data row-wise into testset similarly
    with open(testfile,'r') as file2:
        lines2=csv.reader(file2)
        dataset2=list(lines2)
        for x in range(1,len(dataset2)):
            for y in range(2):
                dataset2[x][y]=float(dataset2[x][y])
            testset.append(dataset2[x])

In [2]:
#Finding Euclidean-Distance
import math
def eucdist(data1,data2,length):
    distance=0
    for x in range(length):
        distance += pow((data1[x]-data2[x]),2)
    return math.sqrt(distance)

In [3]:
#Sorting the calculated distances in ascending order based on distance and k value
import operator
def arranging(trainingset,testset,k):    #Here testset is a list containing single row only
    distances=[]
    length=len(testset)-1
    for x in range(len(trainingset)):
        dist=eucdist(testset,trainingset[x],length) #Calculating Euclidean-Distance
        distances.append((trainingset[x],dist))
    #Sorting in ascending order
    distances.sort(key=operator.itemgetter(1)) #itemgetter(1)->Sorts based on element present at first index
    neighbours=[] #List which will contain k nearest neighbours
    for x in range(k):
        neighbours.append(distances[x][0]) #Appending trainingset data of first k distances in empty list
    return neighbours

In [4]:
# Predicting the outcome on the basis of majority of k nearest neighbours
import operator
def getresponse(neighbours):
    classvotes={}
    for x in range(len(neighbours)):
        response=neighbours[x][-1]
        if response in classvotes:
            classvotes[response] +=1
        else:
            classvotes[response]=1
    sortedvotes=sorted(classvotes.items(),key=operator.itemgetter(1),reverse=True) # Arranging in descending order 
    return sortedvotes[0][0]  #Returning the majority type

In [5]:
# Calculating Accuracy in terms of percentage
def Accuracy(testset,predictions):
    correct=0
    for x in range(len(testset)):
        if testset[x][-1]==predictions[x]:
            correct =correct+1
    return (correct/float(len(testset)))*100.0

#Main Function
def main():
    # Preparing data
    trainingset=[]
    testset=[]
    loadDataset('train.csv','test.csv',trainingset,testset)
    print("Number of rows in Train Set:",repr(len(trainingset)))
    print("Number of rows in Test Set:",repr(len(testset)))
    # Generating predictions
    predictions=[] #List which will contain the predictions
    k=7            #Taking the value of k as odd number
    print("List of Predicted Outcome vs Actual Outcome:")
    for x in range(len(testset)):
        neighbours=arranging(trainingset,testset[x],k)
        result=getresponse(neighbours)
        predictions.append(result)
        print("-> Predicted Outcome ="+repr(result)+":-> Actual Outcome ="+repr(testset[x][-1]))
    accuracy=Accuracy(testset,predictions)
    print('\n')
    print("\033[1m"+"Accuracy of the model:"+repr(accuracy)+"%") #To print in bold
    

main() #In the ouput 'Fourth' is equivalent to 'Juniors'
    
        

Number of rows in Train Set: 450
Number of rows in Test Set: 150
List of Predicted Outcome vs Actual Outcome:
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> Actual Outcome ='seniors'
-> Predicted Outcome ='seniors':-> A