# Classification (KNN)

In [17]:
import numpy as np 
import pandas as pd
from pathlib import Path
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
import matplotlib.pylab as plt
%matplotlib

Using matplotlib backend: TkAgg


In [9]:
mower_df = pd.read_csv('RidingMowers.csv')
mower_df['Number'] = mower_df.index + 1
mower_df

Unnamed: 0,Income,Lot_Size,Ownership,Number
0,60.0,18.4,Owner,1
1,85.5,16.8,Owner,2
2,64.8,21.6,Owner,3
3,61.5,20.8,Owner,4
4,87.0,23.6,Owner,5
5,110.1,19.2,Owner,6
6,108.0,17.6,Owner,7
7,82.8,22.4,Owner,8
8,69.0,20.0,Owner,9
9,93.0,20.8,Owner,10


In [11]:
trainData, valiData = train_test_split(mower_df, test_size=0.4, random_state=26)
print(trainData.shape, valiData.shape)

newhousehold = pd.DataFrame([{'Income' : 60, 'Lot_Size' : 20}])
newhousehold

(14, 4) (10, 4)


Unnamed: 0,Income,Lot_Size
0,60,20


In [19]:
def plotDataset(ax, data, showLabel=True, **kwargs):
    subset = data.loc[data['Ownership'] == 'Owner']
    ax.scatter(subset.Income, subset.Lot_Size, marker = 'o', label='Owner' if showLabel else None, color = 'C1', **kwargs)

    subset = data.loc[data['Ownership'] == 'NonOwner']
    ax.scatter(subset.Income, subset.Lot_Size, marker = 'D', label='NonOwner' if showLabel else None, color = 'C0', **kwargs)

    plt.xlabel('Income')
    plt.ylabel('Lot_Size')
    for _, row in data.iterrows():
        ax.annotate(row.Number, (row.Income + 2, row.Lot_Size))

fig, ax = plt.subplots()

plotDataset(ax, trainData)
plotDataset(ax, valiData, showLabel=False, facecolors='none')

ax.scatter(newhousehold.Income, newhousehold.Lot_Size, marker='*', label='New Household', color='black', s=150)
plt.xlabel('Income')
plt.ylabel('Lot_Size')

handles, labels = ax.get_legend_handles_labels()
ax.set_xlim(40, 120)
ax.legend(handles, labels, loc = 4)

plt.show()

## Run Modeling

In [26]:
scaler = preprocessing.StandardScaler()
scaler.fit(trainData[['Income', 'Lot_Size']])
mowernorm = pd.concat([pd.DataFrame(scaler.transform(mower_df[['Income', 'Lot_Size']]), columns=["zIncome", 'zLot_Size']), mower_df[['Ownership', 'Number']]], axis=1)

trainnorm = mowernorm.iloc[trainData.index]
validnorm = mowernorm.iloc[valiData.index]

newhouseholdnorm = pd.DataFrame(scaler.transform(newhousehold), columns=['zIncome', 'zLot_Size'])

In [25]:
knn = NearestNeighbors(n_neighbors=3)
knn.fit(trainnorm[['zIncome', 'zLot_Size']])
distance, indice = knn.kneighbors(newhouseholdnorm)

print(trainnorm.iloc[indice[0], :])

     zIncome  zLot_Size Ownership  Number
8  -0.069107   0.437269     Owner       9
19 -0.205375  -0.174908  Nonowner      20
12  0.203428   0.284225  Nonowner      13


In [30]:
train_X = trainnorm[['zIncome', 'zLot_Size']]
train_y = trainnorm['Ownership']
valid_X = validnorm[['zIncome', 'zLot_Size']]
valid_y = validnorm['Ownership']

results = []
for k in range(1, 15):
    knn = KNeighborsClassifier(n_neighbors=k).fit(train_X, train_y)
    results.append({
        'k': k,
        'accuracy': accuracy_score(valid_y, knn.predict(valid_X))
    })

results = pd.DataFrame(results)
print(results)

     k  accuracy
0    1       0.6
1    2       0.7
2    3       0.8
3    4       0.9
4    5       0.7
5    6       0.9
6    7       0.9
7    8       0.9
8    9       0.9
9   10       0.8
10  11       0.8
11  12       0.9
12  13       0.4
13  14       0.4


In [34]:
mower_x = mowernorm[['zIncome', 'zLot_Size']]
mower_y = mowernorm['Ownership']

knn = KNeighborsClassifier(n_neighbors=4).fit(mower_x, mower_y)
distance, indice = knn.kneighbors(newhouseholdnorm)

print(knn.predict(newhouseholdnorm))
print("Distance", distance)
print("Indices", indice)
print(mowernorm.iloc[indice[0], :])

['Owner']
Distance [[0.31358009 0.40880312 0.44793643 0.61217726]]
Indices [[ 3  8 13  0]]
     zIncome  zLot_Size Ownership  Number
3  -0.409776   0.743358     Owner       4
8  -0.069107   0.437269     Owner       9
13 -0.804953   0.743358  Nonowner      14
0  -0.477910  -0.174908     Owner       1
