In [166]:
# K-nearest neighbours (KNN) classifications
from sklearn import neighbors
from sklearn import metrics
from sklearn.model_selection import train_test_split
from category_encoders import OrdinalEncoder
import pandas as pd
import numpy as np

In [167]:
# https://archive.ics.uci.edu/ml/datasets/car+evaluation
data = pd.read_csv('car.data')

In [168]:
data.head(5)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [169]:
"""
Using engineering judgment to presume only buying price, maintenance price, and the estimated safety of the car features determine the car acceptability class.
"""
X = data[['buying', 'maint', 'safety']]
y = data[['class']]

In [170]:
# print(X)
# y.head(10)

In [171]:
# Need to encode string feature values to numeric values
ordinal_mappings = [
    {
        "col": "buying",
        "mapping": {
            "vhigh":3, "high":2, "med":1, "low":0
        }
    },
    {
        "col": "maint",
        "mapping": {
            "vhigh":3, "high":2, "med":1, "low":0
        }
    },
    {
        "col": "safety",
        "mapping": {
            "high":2, "med":1, "low":0
        }
    },
]

In [172]:
# https://towardsdatascience.com/a-common-mistake-to-avoid-when-encoding-ordinal-features-79e402796ab4

encoder = OrdinalEncoder(mapping = ordinal_mappings, return_df=True)
X = encoder.fit_transform(X)

In [173]:
# le = LabelEncoder()
# X_copy = X.copy(deep=True)
# for col in X.columns:
#     X_copy[col] = le.fit_transform(X[col])
# X = X_copy.copy(deep=True)

In [174]:
X.head(5)

Unnamed: 0,buying,maint,safety
0,3,3,0
1,3,3,1
2,3,3,2
3,3,3,0
4,3,3,1


In [175]:
# Standardize y
label_mapping = [{
    "col":"class",
    "mapping":{
    'unacc':0,
    'acc':1,
    'good':2,
    'vgood':3 }
}]

encoder = OrdinalEncoder(mapping=label_mapping, return_df=True)
y = encoder.fit_transform(y)

In [176]:
print(y)

      class
0         0
1         0
2         0
3         0
4         0
...     ...
1723      2
1724      3
1725      0
1726      2
1727      3

[1728 rows x 1 columns]


In [177]:
# Create model
knn = neighbors.KNeighborsClassifier(n_neighbors=25, weights='uniform')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
knn.fit(X_train, y_train)
prediction = knn.predict(X_test)

  return self._fit(X, y)


In [178]:
# accuracy
accuracy = metrics.accuracy_score(y_test, prediction)
print("prediction accuracy: ", round(accuracy, 2))

prediction accuracy:  0.74


In [179]:
print("actual value: ", y.loc[20].values)
print("predicted value: ", knn.predict(X)[20])

actual value:  [0]
predicted value:  0
