In [1]:
import sklearn
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
from sklearn import linear_model, preprocessing

data = pd.read_csv("car.data")

#label_encoder
le = preprocessing.LabelEncoder()

#The method fit_transform() takes a list (each of our columns) and will return to us an array containing our new values.
buying = le.fit_transform(list(data["buying"]))
maint = le.fit_transform(list(data["maint"]))
door = le.fit_transform(list(data["door"]))
persons = le.fit_transform(list(data["persons"]))
lug_boot = le.fit_transform(list(data["lug_boot"]))
safety = le.fit_transform(list(data["safety"]))
cls = le.fit_transform(list(data["class"]))

#Now we need to recombine our data into a feature list and a label list. We can use the zip() function to makes things easier.
x = list(zip(buying, maint, door, persons, lug_boot, safety)) #features
y = list(cls) #labels

#Finally we will split our data into training and testing data using the same process seen previously.
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size = 0.1)

#define model and k value
model = KNeighborsClassifier(n_neighbors=9)

#To train our model we follow precisely the same steps as outlined earlier.
model.fit(x_train, y_train)

#And once again to score our model we will do the following.
acc = model.score(x_test, y_test)
#print(acc)

#If we'd like to see how our model is performing on the unique elements of our test data we can do the following.
predicted = model.predict(x_test)
names = ["unacc", "acc", "good", "vgood"]

for i in range(len(predicted)):
    print("Predicted: ", names[predicted[i]], "| Data: ", x_test[i], "| Actual: ", names[y_test[i]])
    
# This will display the predicted class, our data and the actual class
# We create a names list so that we can convert our integer predictions into their string representation 

    # Now we will we see the neighbors of each point in our testing data
    n = model.kneighbors([x_test[i]], 9, True)
    print("N: ", n)

Predicted:  good | Data:  (3, 3, 2, 2, 0, 2) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.41421356, 1.41421356, 1.41421356]]), array([[1316,  908, 1091,   91,  516, 1472, 1048,  777,  666]],
      dtype=int64))
Predicted:  good | Data:  (1, 2, 0, 2, 1, 2) | Actual:  unacc
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.41421356]]), array([[ 405,  206,  911,  354,  188, 1383,  505,  685,  478]],
      dtype=int64))
Predicted:  good | Data:  (3, 3, 1, 0, 0, 0) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.41421356, 1.41421356]]), array([[1446,  145,   96, 1538,  826,  379,  597,  996, 1410]],
      dtype=int64))
Predicted:  good | Data:  (1, 0, 1, 1, 1, 1) | Actual:  good
N:  (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 189,  859,  590,  508,  461, 1037, 1481, 1422,

N:  (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 461, 1519, 1037,  437,  573,  688, 1323,  665, 1554]],
      dtype=int64))
Predicted:  good | Data:  (1, 1, 2, 1, 1, 1) | Actual:  good
N:  (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1321, 1161, 1503,  595,   78, 1090, 1548, 1213,  859]],
      dtype=int64))
Predicted:  good | Data:  (0, 3, 1, 0, 0, 0) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.41421356, 1.41421356]]), array([[ 722, 1424, 1284,  255,  509,  308,  771, 1257,  986]],
      dtype=int64))
Predicted:  good | Data:  (2, 3, 0, 2, 2, 0) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.41421356, 1.41421356]]), array([[  28, 1365, 1016,  154,  460,  575,  410,  477,  321]],
      dtype=int64))
Predicted:  good | Data:  (0, 2, 0, 0, 0, 0) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1. 

Predicted:  good | Data:  (0, 0, 0, 1, 0, 1) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.41421356]]), array([[ 295, 1179, 1135, 1254, 1535, 1415, 1157,  953, 1143]],
      dtype=int64))
Predicted:  good | Data:  (0, 0, 1, 0, 2, 2) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.41421356, 1.41421356, 1.41421356]]), array([[1341, 1107, 1315,  488, 1200,  567,  981,  256,  872]],
      dtype=int64))
Predicted:  good | Data:  (1, 0, 3, 1, 2, 2) | Actual:  unacc
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.41421356, 1.41421356, 1.41421356]]), array([[882, 291, 204, 924, 697, 584, 430, 671, 691]], dtype=int64))
Predicted:  good | Data:  (3, 1, 1, 0, 0, 2) | Actual:  good
N:  (array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.41421356, 1.41421356, 1.414