# kNN Hash Example

In [7]:
import numpy as np
import pandas as pd
import math
from sklearn.datasets import load_iris
from functools import partial
from random import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from collections import defaultdict
from collections import Counter

## Iris dataset

In [8]:
df = load_iris()
df.data.shape

(150, 4)

In [19]:
def f_hash(w,r,b,x):
    return int((np.dot(w,x)+b)/r)

def distance(x, y):
    value = 0
    for i in range(x.shape[0]):
        value += pow(x[i] - y[0][i], 2)
    return math.sqrt(value)
    

In [20]:
class KNNHash(object):
    def __init__(self,m,L,nn):
        self.m = m
        self.L = L
        self.nn = nn

    def fit(self,X,y):
        self.t_hh = [] #hash table
        for j in range(self.L):
            f_hh = [] #compositional hash function
            for i in range(self.m):
                w = np.random.rand(1,X[0].shape[0]) #  weights of a hash function
                f_hh.append(partial(f_hash,w = w,r=random(),b=random())) # list of initialized hash function
            self.t_hh.append(
                (defaultdict(list),f_hh)
            )
        for n in range(X.shape[0]): 
            for j in range(self.L):
                ind = 0
                for i in range(self.m):
                    ind = ind + self.t_hh[j][1][i](x=X[n]) #calculation of index in hash table, simply sum of all hash func
                self.t_hh[j][0][ind].append((X[n],y[n])) #saving sample into corresponding index
                
  
    
    def predict(self,u):
            for j in range(self.L):
                inds = []
                frame = []
                labelFrame = []
                for i in range(self.m):
                    inds.append(self.t_hh[j][1][i](x=u))
                cntr = Counter([outp for inpt,outp in self.t_hh[j][0][sum(inds)]])
                
                for output in self.t_hh[j][0][sum(inds)]: 
                    frame.append(distance(u, output)) 
                    labelFrame.append(output[1]) 
                ans = frame.index(min(frame)) 
                print(min(frame)) 
                print("ans: " + str(labelFrame[ans])) 


In [21]:
scaler = MinMaxScaler()
scaler.fit(df.data)
x = scaler.transform(df.data)
y = df.target


In [22]:

knnhash = KNNHash(4,4,4)
test1x = x[0]
test2x = x[75]
test3x = x[149]

test1y = y[0]
test2y = y[75]
test3y = y[149]
x = np.delete(x,[0,75,149],axis=0)
y = np.delete(y,[0,75,149],axis=0)
print("-------------")
knnhash.fit(x,y)
print(test1y)
knnhash.predict(test1x)
print("-------------")
knnhash.fit(x,y)
print(test2y)
knnhash.predict(test2x)
print("-------------")
knnhash.fit(x,y)
print(test3y)
knnhash.predict(test3x)

-------------
0
0.03254041656427031
ans: 0
0.03254041656427031
ans: 0
0.03254041656427031
ans: 0
0.03254041656427031
ans: 0
-------------
1
0.06798027147519138
ans: 1
0.1029623988718487
ans: 1
0.06798027147519138
ans: 1
0.05007710104811091
ans: 1
-------------
2
0.22847630881516148
ans: 1
0.05794021820301701
ans: 2
0.05794021820301701
ans: 2
0.13465777381712032
ans: 2


* Each string above corresponds to the particular hash table. And index in counter maps to the class. For example Counter({0: 13, 1: 1}) means that there are 13 samples close to "u" with "0" class labels and 1 sample with "1" class label.