-
Notifications
You must be signed in to change notification settings - Fork 0
/
KNN.py
59 lines (43 loc) · 1.34 KB
/
KNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
from stuff import dist
from statistics import mode
class KNN():
'''
KNeighboursClassifier.\n
Parameters
----------
k: int, default=3
The number of neighbours.\n
p: int, default=2
Power parameter in Minkowski metric.
p=1 for Manhattan, p=2 for Euclidian, etc.
'''
def __init__(self, k=3, p=2):
self.k = k
self.p = p
def fit(self, X, y):
# The feature vector. Assumed to be a pd.DataFrame.
self.X = X
# The label. Assumed to be a pd.Series.
self.label = y
return self
def nearest(self, train, test):
# The distance from the test point to each of the training points.
distances = [dist(test, train.iloc[i], p=self.p)
for i in range(train.shape[0])]
# Sorting the distances to get k-nearest of them.
distances = [i for i in enumerate(distances)]
distances.sort(key=lambda x: x[1])
# Getting the indices of the nearest k points to turn them into labels.
nearest_indices = [distances[i][0] for i in range(self.k)]
# Get the labels.
labels = [self.label.iloc[i] for i in nearest_indices]
# Get the most frequent label. This is like voting.
label = mode(labels)
return label
def predict(self, X):
# Test set, assumed to be a pd.DataFrame.
test = X
labels = [self.nearest(train=self.X, test=test.iloc[i])
for i in range(test.shape[0])]
return np.array(labels)