In [16]:
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()

In [17]:
from sklearn.model_selection import train_test_split

#Split up the data and targets into train/test sets
train_data, test_data, train_target, test_target = train_test_split(iris.data, iris.target, test_size = 0.3, shuffle = True)


In [18]:
class KNNClassifier:
    def __init__(self, k):
        self.k = k
        
    def fit(self, train_data, train_targets):
        self.train_data = train_data
        self.train_targets = train_targets
    
    def get_distance(self, pointA, pointB):
        distance = 0;
        for x1, x2 in zip(pointA, pointB):
            distance += (x1 - x2) ** 2
        return np.sqrt(distance)
    
    def find_mode(self, targets):
        target_counts = {}
        # Make a map with the key being the possible target and the value
        # being its number of occurances
        for x in targets:
            if x in target_counts:
                target_counts[x] += 1
            else:
                target_counts[x] = 1  
        #return the key of the target that has the most counts
        return max(target_counts, key = lambda k: target_counts[k])
        
    def find_k_nearest(self, dists):
        sorted_dist = dists.copy()
        sorted_dist.sort()
        k_n_indicies = []
        
        # use the sorted list to find the indicies of the k
        # closest neighbors
        for i in range(0, self.k):
            k_n_indicies.append(dists.index(sorted_dist[i]))
        
        return k_n_indicies
        
        
    def predict_one(self, test_datum):
        distances = []
        targets = []
            
        for i in range(len(self.train_data)): 
            # put each distance into a list
            distances.append(self.get_distance(self.train_data[i], test_datum))         
            
        indices = self.find_k_nearest(distances)
        for i in indices:
            targets.append(self.train_targets[i])          
        return self.find_mode(targets)
    
    def predict(self, test_data):
        predictions = []
        for test_datum in test_data:
            prediction = self.predict_one(test_datum)
            predictions.append(prediction)
            
        return predictions
    

In [19]:
classifier = KNNClassifier(5)
classifier.fit(train_data, train_target)
predictions = classifier.predict(test_data)
    
numCorrect = 0
for i in range(0, len(predictions)):
    if (predictions[i] == test_target[i]):
        numCorrect += 1

print("My classifier got", round(numCorrect / len(test_target) * 100, 2), "% correct")

My classifier got 95.56 % correct


In [223]:
print(predictions)
print(test_target)

[0, 1, 1, 1, 0, 2, 1, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 2, 0, 2, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 1, 1, 2, 1, 0, 1, 0]
[2 1 0 2 1 2 1 0 1 2 0 0 2 0 1 2 2 2 1 0 1 2 2 2 2 2 2 1 1 2 0 2 2 0 2 0 0
 2 1 2 2 1 0 0 0]


In [224]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(train_data, train_target)
predictions = classifier.predict(test_data)

numCorrect = 0
for i in range(0, len(predictions)):
    if (predictions[i] == test_target[i]):
        numCorrect += 1

print("The sklearn classifer got", round(numCorrect / len(test_target) * 100, 2), "% correct")

The sklearn classifer got 97.78 % correct


In [225]:
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(train_data, train_target)
predictions = classifier.predict(test_data)

numCorrect = 0
for i in range(0, len(predictions)):
    if (predictions[i] == test_target[i]):
        numCorrect += 1

print("The sklearn classifer got", round(numCorrect / len(test_target) * 100, 2), "% correct")

The sklearn classifer got 97.78 % correct


In [226]:
classifier = KNeighborsClassifier(n_neighbors=4)
classifier.fit(train_data, train_target)
predictions = classifier.predict(test_data)

numCorrect = 0
for i in range(0, len(predictions)):
    if (predictions[i] == test_target[i]):
        numCorrect += 1

print("The sklearn classifer got", round(numCorrect / len(test_target) * 100, 2), "% correct")

The sklearn classifer got 97.78 % correct


In [227]:
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(train_data, train_target)
predictions = classifier.predict(test_data)

numCorrect = 0
for i in range(0, len(predictions)):
    if (predictions[i] == test_target[i]):
        numCorrect += 1

print("The sklearn classifer got", round(numCorrect / len(test_target) * 100, 2), "% correct")

The sklearn classifer got 97.78 % correct
