# Nearest Neighbor Algorithm  

In [204]:
import math
import pandas as pd
import operator 
import numpy as np
from sklearn.model_selection import train_test_split

In [205]:
music = pd.DataFrame()

# Some data to play with.
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]

# We know whether the songs in our training data are jazz or not.
music['jazz'] = [ 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
                  0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                  1, 1, 1, 1, 0, 0, 1, 1, 0, 0]

X = music[['loudness', 'duration']]
Y = music.jazz

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 465)

train = X_train.join(y_train)
test = X_test.join(y_test)

In [206]:
trainSet = train.as_matrix(columns=None)
testSet = test.as_matrix(columns=None)

  """Entry point for launching an IPython kernel.
  


In [207]:
trainSet[0]

array([ 12, 192,   1])

In [208]:
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

In [209]:
data1 = trainSet[0]
data2 = trainSet[1]
distance = euclideanDistance(data1, data2, 2)
print('Distance: ' + repr(distance))

Distance: 209.00239233080563


In [210]:
def getNeighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

In [217]:
trainSet = trainSet
testInstance = [[ 24, 190]]

k = 5

neighbors = getNeighbors(trainSet, testInstance, 5)

print(neighbors)

[array([ 12, 192,   1]), array([ 11, 401,   1]), array([ 36, 186,   0]), array([  4, 204,   1]), array([ 34, 234,   0])]


In [218]:
def getResponse(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    print(sortedVotes)
    return sortedVotes[0][0]

In [219]:
#neighbors = trainSet
response = getResponse(neighbors)
print(response)

[(1, 3), (0, 2)]
1
