In [160]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd

iris = fetch_ucirepo(id=53)

X = iris.data.features
y = iris.data.targets
df = iris.data.original

label_dict = {k: 0 for k in y['class']}


In [161]:
import numpy as math

def computeDistance(a, b):
    return math.sqrt(math.sum([(ai-bi) ** 2 for ai, bi in zip(a, b)]))


In [162]:
def getMajorityLabel(neighboursList):
    majority = label_dict.copy()
    furthest = neighboursList[-1][0] + 1
    for dist, label in neighboursList:
        majority[label] += furthest - dist
    return max(majority, key=majority.get)


In [163]:
def simpleSample(data):
    testing = data.groupby('class', group_keys=False).sample(frac=0.2)
    learning = data.drop(testing.index)
    return learning, testing


In [164]:
def fold10Sample(data):
    folds = []
    for i in range(0,10):
        folds.append(data.groupby('class', group_keys=False).sample(4))
        data.drop(folds[i].index)
    return folds


In [165]:
def kNNClassify(X, Y, xt, k):
    distancePairList = []
    for xi, yi in zip(X.values, Y.values):
        di = computeDistance(xi, xt)
        distancePairList.append((di, yi))
    distancePairList.sort()

    kNeighboursList = distancePairList[:k]
    yt = getMajorityLabel(kNeighboursList)

    return yt


In [166]:
import numpy as np

learning, testing = simpleSample(df)
_ , validate = simpleSample(learning)
k = input("Value?")
k = int(k)
success = 0
for v in validate.values:
    #print(learning.drop('class', axis=1), learning['class'])
    ys = kNNClassify(learning.drop('class', axis=1), learning['class'], v[:-1], k )
    if ys == v[-1]:
        success += 1

print("1.Train Set Accuracy:\nAccuracy:",success/24*100)

print("\n2.10-Fold Cross-Validation Results:")
folds = fold10Sample(learning)
successes = [0] * 10
for i in range(0,10):
    learning = df.drop(folds[i].index)
    for t in folds[i].values:
        ys = kNNClassify(learning.drop('class', axis=1), learning['class'], t[:-1], k )
        if ys == t[-1]:
            successes[i] += 1
    successes[i] = successes[i]/12*100
    print("Accuracy Fold", i+1, ":",successes[i])

print("\nAverage Accuracy:", np.average(successes))
print("Standard Deviation:", np.std(successes))
#print(computeDistance(sample.values[0][:-1], sample.values[1][:-1]))

success = 0
for t in testing.values:
    #print(learning.drop('class', axis=1), learning['class'])
    ys = kNNClassify(learning.drop('class', axis=1), learning['class'], t[:-1], k )
    if ys == t[-1]:
        success += 1

print("\n3.Train Set Accuracy:\nAccuracy:",success/30*100)


1.Train Set Accuracy:
Accuracy: 100.0

2.10-Fold Cross-Validation Results:
Accuracy Fold 1 : 83.33333333333334
Accuracy Fold 2 : 100.0
Accuracy Fold 3 : 100.0
Accuracy Fold 4 : 100.0
Accuracy Fold 5 : 100.0
Accuracy Fold 6 : 100.0
Accuracy Fold 7 : 91.66666666666666
Accuracy Fold 8 : 100.0
Accuracy Fold 9 : 91.66666666666666
Accuracy Fold 10 : 91.66666666666666

Average Accuracy: 95.83333333333333
Standard Deviation: 5.5901699437494745

3.Train Set Accuracy:
Accuracy: 100.0
