In [55]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [56]:
iris = datasets.load_iris()
#Split up the data and targets into train/test sets
train_data, test_data, train_target, test_target = train_test_split(iris.data, iris.target, test_size = 0.3, shuffle = True)

In [74]:
class KNNClassifier:
    def __init__(self, k, dist_type):
        self.k = k
        self.dist_type = dist_type
        
    def fit(self, train_data, train_targets):
        self.train_data = train_data
        self.train_targets = train_targets
    
    def get_distance(self, pointA, pointB):
        distance = 0;
        if (self.dist_type == 1):
            for x1, x2 in zip(pointA, pointB):
                distance += (x1 - x2) ** 2
            distance = np.sqrt(distance)
        else:
             for x1, x2 in zip(pointA, pointB):
                distance += abs(x1 - x2)           
        return distance
    
    def find_mode(self, targets):
        target_counts = {}
        # Make a map with the key being the possible target and the value
        # being its number of occurances
        for x in targets:
            if x in target_counts:
                target_counts[x] += 1
            else:
                target_counts[x] = 1  
        #return the key of the target that has the most counts
        return max(target_counts, key = lambda k: target_counts[k])
        
    def find_k_nearest(self, dists):
        sorted_dist = dists.copy()
        sorted_dist.sort()
        k_n_indicies = []
        
        # use the sorted list to find the indicies of the k
        # closest neighbors
        for i in range(0, self.k):
            k_n_indicies.append(dists.index(sorted_dist[i]))
        
        return k_n_indicies
          
    def predict_one(self, test_datum):
        distances = []
        targets = []
            
        for i in range(len(self.train_data)): 
            # put each distance into a list
            distances.append(self.get_distance(self.train_data[i], test_datum))         
            
        indices = self.find_k_nearest(distances)
        for i in indices:
            targets.append(self.train_targets[i])          
        return self.find_mode(targets)
    
    def predict(self, test_data):
        predictions = []
        for test_datum in test_data:
            prediction = self.predict_one(test_datum)
            predictions.append(prediction)
            
        return predictions
    

In [82]:
classifier = KNNClassifier(3, 1)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("My classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

My classifier got:  95.56 %


In [83]:
classifier = KNNClassifier(3, 0)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("My classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

My classifier got:  97.78 %


In [59]:
print(predicted_targets)
print(test_target)

[0, 2, 0, 1, 2, 1, 0, 1, 0, 2, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 2, 0, 1, 0, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 2, 1, 0, 2, 2, 1, 2, 0, 0, 2]
[0 2 0 1 2 2 0 1 0 2 0 1 0 0 1 1 1 1 1 0 1 2 0 1 0 2 2 2 1 0 0 0 1 0 0 2 1
 0 2 2 1 2 0 0 2]


In [60]:
#############################################
# EXPERIMENTATION
#############################################
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("The sklearn KNN classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

The sklearn KNN classifier got:  97.78 %


In [67]:
classifier = KNeighborsClassifier(n_neighbors=3, p=1)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("The sklearn KNN classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

The sklearn KNN classifier got:  97.78 %


In [68]:
classifier = KNeighborsClassifier(n_neighbors=5, p=1)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("The sklearn KNN classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

The sklearn KNN classifier got:  97.78 %


In [69]:
classifier = KNeighborsClassifier(n_neighbors=4, p=1)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("The sklearn KNN classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

The sklearn KNN classifier got:  97.78 %


In [71]:
classifier = KNeighborsClassifier(n_neighbors=9, p=1)
classifier.fit(train_data, train_target)
predicted_targets = classifier.predict(test_data)

print("The sklearn KNN classifier got: ", round(accuracy_score(test_target, predicted_targets) * 100, 2) , "%")

The sklearn KNN classifier got:  95.56 %
