## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import sklearn
import sklearn.model_selection
from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model, preprocessing
import matplotlib.pyplot as plt
from matplotlib import style
import pickle

In [2]:
df = pd.read_csv('car.data')
df.head()

Unnamed: 0,buying,maint,door,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [3]:
le = preprocessing.LabelEncoder()

In [4]:
df.buying = le.fit_transform(df['buying'])
df.maint = le.fit_transform(df['maint'])
df.lug_boot = le.fit_transform(df['lug_boot'])
df.safety = le.fit_transform(df['safety'])
df.door = le.fit_transform(df['door'])
df.persons = le.fit_transform(df['persons'])
df['class'] = le.fit_transform(df['class'])
df

Unnamed: 0,buying,maint,door,persons,lug_boot,safety,class
0,3,3,0,0,2,1,2
1,3,3,0,0,2,2,2
2,3,3,0,0,2,0,2
3,3,3,0,0,1,1,2
4,3,3,0,0,1,2,2
...,...,...,...,...,...,...,...
1723,1,1,3,2,1,2,1
1724,1,1,3,2,1,0,3
1725,1,1,3,2,0,1,2
1726,1,1,3,2,0,2,1


In [5]:
X = df.drop(['class'], 1)
y = df['class']

## Splitting and training our data

In [6]:
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.2)

In [7]:
knn = KNeighborsClassifier(n_neighbors=5)

In [8]:
knn.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [9]:
predicted = knn.predict(x_test)
names = ["unacc", "acc", "good", "vgood"]

for x in range(len(predicted)):
    print("Predicted: ", names[predicted[x]], "Data: ", np.array(x_test)[x], "Actual: ", names[np.array(y_test)[x]])
    n = knn.kneighbors([ np.array(x_test)[x]], 5, True)
    print("N: ", n)

Predicted:  good Data:  [2 3 3 1 0 1] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[  70,  532, 1201,  667,  692]], dtype=int64))
Predicted:  unacc Data:  [0 0 0 1 0 0] Actual:  unacc
N:  (array([[1., 1., 1., 1., 1.]]), array([[  16, 1341, 1205,  980, 1136]], dtype=int64))
Predicted:  unacc Data:  [2 2 1 2 2 0] Actual:  unacc
N:  (array([[1., 1., 1., 1., 1.]]), array([[1051, 1209, 1058, 1042, 1161]], dtype=int64))
Predicted:  good Data:  [0 2 0 1 2 1] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[1380,  728,  700,  152,  877]], dtype=int64))
Predicted:  vgood Data:  [1 1 2 2 0 0] Actual:  vgood
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 821,  492,  108, 1115,  503]], dtype=int64))
Predicted:  good Data:  [0 3 0 1 1 2] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[1152,   84,  429,  858, 1274]], dtype=int64))
Predicted:  good Data:  [1 0 0 0 2 0] Actual:  good
N:  (array([[1.        , 1.        , 1.41421356, 1.41421356, 1.41421356]]), array([[ 836, 

N:  (array([[1., 1., 1., 1., 1.]]), array([[ 516, 1009,  658,  401,  507]], dtype=int64))
Predicted:  good Data:  [3 0 0 1 2 2] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 948,  675, 1221, 1105, 1364]], dtype=int64))
Predicted:  good Data:  [0 3 2 1 2 0] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 453, 1025,  275,  760, 1019]], dtype=int64))
Predicted:  good Data:  [0 2 2 0 2 2] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 730, 1070,  430,  352, 1350]], dtype=int64))
Predicted:  good Data:  [0 2 1 0 0 0] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[1004,  488,  708,  517,  292]], dtype=int64))
Predicted:  good Data:  [1 2 0 1 2 1] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[1348,  308,  342,   69, 1368]], dtype=int64))
Predicted:  good Data:  [2 1 0 0 0 2] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[173, 499, 118, 129, 406]], dtype=int64))
Predicted:  good Data:  [3 1 2 2 2 1] Actual:  good
N:  (array(

N:  (array([[1., 1., 1., 1., 1.]]), array([[ 931,   84,  582, 1071, 1368]], dtype=int64))
Predicted:  good Data:  [3 3 1 0 0 2] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[546, 679,  13, 626,  34]], dtype=int64))
Predicted:  good Data:  [0 2 3 0 0 0] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 791,  383,  489, 1246,   12]], dtype=int64))
Predicted:  good Data:  [0 1 0 0 0 2] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 407,  681,  211,  964, 1087]], dtype=int64))
Predicted:  good Data:  [0 0 3 1 2 1] Actual:  good
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 413,  604,  148, 1346, 1168]], dtype=int64))
Predicted:  unacc Data:  [1 0 3 2 0 2] Actual:  unacc
N:  (array([[1., 1., 1., 1., 1.]]), array([[ 92, 985, 132, 131, 556]], dtype=int64))
Predicted:  unacc Data:  [2 2 3 1 1 2] Actual:  unacc
N:  (array([[1., 1., 1., 1., 1.]]), array([[1204,  219, 1185,  886,  253]], dtype=int64))
Predicted:  good Data:  [0 3 3 2 1 0] Actual:  good
N:  (array([

In [10]:
acc = knn.score(x_test, y_test)
print(acc)

0.9161849710982659
