# Theory

# Implementation

## Inititialization Cell

In [None]:
from sklearn import datasets

iris_ds = datasets.load_iris()

def groupby(func,items):
    d = {}
    for itm in items:
        k = func(itm)
        if k in d:
            d[k].append(itm)
        else:
            d[k] = [itm]
    return d
    
# A nifty little function, mirrors Clojure's frequencies function
def freqs(items):
    "Returns a dictionary of the form  {item : items_frequency}"
    d = {}
    for x in items:
        d[x] = (d[x] + 1 if x in d else 1)
    return d

## Utils

In [None]:
import numpy as np

def euclidean_dist(a,b):
    t = a-b
    return np.dot(t,t.T)**0.5

class fix_size_list:
    #this class maintains a list of objects, represented as dicts, which
    # contain a distance attribute.  The list will never grow beyond a certain
    # size, replacing higher distance members with lower distance additions
    
    def __init__(self,max_size):
        self._n = max_size
        self._list = []
        self._max_dist = float("inf")
        
    def add(self, item):
        self._list.append(item)
        
        if len(self._list) > self._n:
            #I don't care about efficiency, just get it done
            self._list = sorted(self._list, key=lambda x : x['distance'])
            self._list.pop(-1)
            self._max_dist = self._list[-1]['distance']
            
    
    def threshold(self):
        return self._max_dist
    
    def as_list(self):
        return self._list

## Niave KNN

In [None]:

class naive_knn:
    
    def __init__(self, k, dataset, weighted_dist):
        self._k = k
        self._data = dataset
        self._dist_metric = euclidean_dist
        
    def search(self, item):
        results = fix_size_list(self._k)
        
        for x in self._data:
            d = self._dist_metric(x,item)
            if d < results.threshold():
                results.add({'item' : x, 'distance' : d})
                
        return [x['item'] for x in results.as_list()]
            