In [1]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import utils, datasets

np.random.seed(42)

In [2]:
iris:utils.Bunch = datasets.load_iris()
x = iris.data
y = iris.target
num_samples = x.shape[0]
num_feats = x.shape[1]
num_classes = y.shape[0]
test_size = num_samples // 3

random_data_points = np.random.permutation(num_samples)

training_data_points = x[random_data_points[:-test_size]]
training_classes = y[random_data_points[:-test_size]]

test_data_points = x[random_data_points[-test_size:]]
test_classes = y[random_data_points[-test_size:]]

In [3]:
clf = KNeighborsClassifier(n_neighbors=4, weights="uniform")
clf.fit(training_data_points, training_classes)

### Exercise 1: Try Different Hyperparameters

In [4]:
highest = 0
highestNum = -1
highestWeight = ""
for num in np.arange(1, 10):
    for weight in ["uniform", "distance"]:
        clf = KNeighborsClassifier(n_neighbors=num)
        clf.fit(training_data_points, training_classes)
        accuracy = clf.score(test_data_points, test_classes)
        if accuracy > highest:
            highest = accuracy
            highestNum = num
            highestWeight = weight
        print(f"{weight}({num}): {accuracy*100:.4}")
        
print(f"highest accuracy: {highestNum} neighbors({highestWeight})")
    

uniform(1): 96.0
distance(1): 96.0
uniform(2): 96.0
distance(2): 96.0
uniform(3): 96.0
distance(3): 96.0
uniform(4): 96.0
distance(4): 96.0
uniform(5): 96.0
distance(5): 96.0
uniform(6): 96.0
distance(6): 96.0
uniform(7): 96.0
distance(7): 96.0
uniform(8): 98.0
distance(8): 98.0
uniform(9): 96.0
distance(9): 96.0
highest accuracy: 8 neighbors(uniform)


### Re-Train best KNN-Model

In [5]:
clf = KNeighborsClassifier(n_neighbors=highestNum, weights=highestWeight)
clf.fit(training_data_points, training_classes)

In [6]:
predictions = clf.predict(test_data_points)
print(f"predictions:\n{predictions}")
print(f"expected:\n{test_classes}")

probabilities = clf.predict_proba(test_data_points)
print(f"probabilities:\n{probabilities}")

predictions:
[0 0 0 1 2 0 2 2 0 1 1 2 1 2 0 2 1 2 1 1 1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 1 2
 2 1 2 1 1 2 2 0 1 1 0 1 2]
expected:
[0 0 0 1 2 0 2 2 0 1 1 2 1 2 0 2 1 2 1 1 1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 1 2
 2 1 2 1 1 2 2 0 1 2 0 1 2]
probabilities:
[[1.    0.    0.   ]
 [1.    0.    0.   ]
 [1.    0.    0.   ]
 [0.    0.5   0.5  ]
 [0.    0.    1.   ]
 [1.    0.    0.   ]
 [0.    0.    1.   ]
 [0.    0.    1.   ]
 [1.    0.    0.   ]
 [0.    0.625 0.375]
 [0.    0.875 0.125]
 [0.    0.375 0.625]
 [0.    0.875 0.125]
 [0.    0.    1.   ]
 [1.    0.    0.   ]
 [0.    0.125 0.875]
 [0.    1.    0.   ]
 [0.    0.125 0.875]
 [0.    1.    0.   ]
 [0.    1.    0.   ]
 [0.    0.875 0.125]
 [1.    0.    0.   ]
 [0.    1.    0.   ]
 [0.    1.    0.   ]
 [1.    0.    0.   ]
 [0.    1.    0.   ]
 [0.    0.    1.   ]
 [0.    0.    1.   ]
 [1.    0.    0.   ]
 [0.    1.    0.   ]
 [0.    0.    1.   ]
 [0.    0.125 0.875]
 [1.    0.    0.   ]
 [0.    0.    1.   ]
 [1.    0.    0.   ]
 [0.    0.875 0.125]
