In [1]:
# Import libraries
from sklearn import neighbors, model_selection
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

In [2]:
# Load data
column_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
car_data = pd.read_csv('car.data', names=column_names)
car_data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [5]:
# Convert categorical data to numeric
categorical_columns = ['buying', 'maint', 'lug_boot', 'safety']

for ind, column in enumerate(categorical_columns):
    column_map = [(item, ind) for ind, item in enumerate(car_data[column].unique())]
    car_data[column] = car_data[column].apply(dict(column_map).get)
car_data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,0,0,2,2,0,0,unacc
1,0,0,2,2,0,1,unacc
2,0,0,2,2,0,2,unacc
3,0,0,2,2,1,0,unacc
4,0,0,2,2,1,1,unacc


In [6]:
# Clean doors and persons columns
car_data['doors'] = car_data['doors'].replace({'5more': '5'}, regex=True)
car_data['persons'] = car_data['persons'].replace({'more': '5'}, regex=True)

In [7]:
# Split into train and test datasets
arr = car_data.values
X = arr[:,0:6]
y = arr[:,6]

train_data, test_data, train_labels, test_labels = model_selection.train_test_split(X, y, test_size=0.2)

In [8]:
# Make prediction on test data
model = neighbors.KNeighborsClassifier(n_neighbors=3)
model.fit(train_data, train_labels)
prediction = model.predict(test_data)

accuracy_score(prediction, test_labels) * 100

90.17341040462428

In [10]:
# Show sample prediction
safety_map = dict([(ind, item) for ind, item in enumerate(car_data['class'].unique())])

for i in range(10):
    print("Predicted safety: {}, Actual safety: {}".format(prediction[i], test_labels[i]))

Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: acc, Actual safety: acc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
Predicted safety: unacc, Actual safety: unacc
