# KNN Classifiers
Create a custom KNN classifier for the music dataset.

## Create Dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from sklearn.neighbors import KNeighborsClassifier

%matplotlib inline

In [2]:
music = pd.DataFrame()

# Some data to play with.
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]

# We know whether the songs in our training data are jazz or not.
music['jazz'] = [ 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
                  0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                  1, 1, 1, 1, 0, 0, 1, 1, 0, 0]

## Custom Function

In [3]:
def predict_music(df, duration, loadness, k):
    df['distance'] = np.sqrt((duration - df['duration'])**2 + (loadness - df['loudness'])**2)
    df.sort_values(by='distance', inplace=True)
    df_k = df.iloc[:k, :].copy()
    prob_dict = {
        'jazz': len(df_k[df_k['jazz'] == 1]) / len(df_k),
        'rock': len(df_k[df_k['jazz'] == 0]) / len(df_k)
    }
    
    print('The probabilities are:')
    for key, value in prob_dict.items():
        print(f'{key}: {value}')
    
    print(f'Prediction: {max(prob_dict, key=prob_dict.get)} music')

In [4]:
predict_music(music, 190, 24, 5)

The probabilities are:
jazz: 0.6
rock: 0.4
Prediction: jazz music


## Using Scikit-Learn

In [5]:
def predict_music_sklearn(df, duration, loadness, k):
    neighbors = KNeighborsClassifier(n_neighbors=5)
    X = music[['loudness', 'duration']]
    Y = music.jazz
    neighbors.fit(X,Y)

    ## Predict for a 24 loudness, 190 seconds long song.
    prediction = neighbors.predict([[24, 190]])[0]
    probabilities = neighbors.predict_proba([[24, 190]])[0]
    
    music_type=''
    if prediction == 1:
        music_type = 'jazz'
    else:
        music_type = 'rock'
        
    prob_dict = {
        'jazz': probabilities[1],
        'rock': probabilities[0]
    }
    
    print('The probabilities are:')
    for key, value in prob_dict.items():
        print(f'{key}: {value}')
        
    print(f'Prediction: {music_type} music')

In [6]:
predict_music_sklearn(music, 190, 24, 5)

The probabilities are:
jazz: 0.6
rock: 0.4
Prediction: jazz music


I get the same answer as my custom function!