# Car evaluation

**By:** Juan Bernardo Benavides Rubio

---



In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [18]:
names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

data = pd.read_csv("data/car.data", names=names)
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [19]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   buying    1728 non-null   object
 1   maint     1728 non-null   object
 2   doors     1728 non-null   object
 3   persons   1728 non-null   object
 4   lug_boot  1728 non-null   object
 5   safety    1728 non-null   object
 6   class     1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


In [20]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

In [21]:
buying_fit = le.fit(data['buying'])
buying_fit.classes_

array(['high', 'low', 'med', 'vhigh'], dtype=object)

In [22]:
le.transform(['vhigh', 'vhigh', 'low', 'high', 'med', 'high', 'med'])

array([3, 3, 1, 0, 2, 0, 2])

In [23]:
buying = le.transform(data['buying'])
buying

array([3, 3, 3, ..., 1, 1, 1])

In [24]:
data['doors']

0           2
1           2
2           2
3           2
4           2
        ...  
1723    5more
1724    5more
1725    5more
1726    5more
1727    5more
Name: doors, Length: 1728, dtype: object

In [25]:
doors = le.fit_transform(data['doors'])
doors

array([0, 0, 0, ..., 3, 3, 3])

In [26]:
buying = le.fit_transform(data['buying'])
maint = le.fit_transform(data['maint'])
doors = le.fit_transform(data['doors'])
persons = le.fit_transform(data['persons'])
lug_boot = le.fit_transform(data['lug_boot'])
safety = le.fit_transform(data['safety'])


In [27]:
target = "class"
le.fit(data['class'])
cls = le.transform(data['class'])

In [28]:
X = np.array(list(zip(buying, maint, doors, persons, lug_boot, safety)))
y = cls
y.shape

(1728,)

In [29]:
from sklearn.model_selection import train_test_split

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))

X_train shape: (1555, 6)
y_train shape: (1555,)


In [31]:
from sklearn.neighbors import KNeighborsClassifier

In [32]:
knn = KNeighborsClassifier(n_neighbors=9)

knn.fit(X_train, y_train)
knn.score(X_test, y_test)

0.9248554913294798

In [33]:
parameters = {'n_neighbors': np.arange(1, 51)}
parameters

{'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])}

In [34]:
from sklearn.model_selection import GridSearchCV

In [38]:
grid_search = GridSearchCV(knn, parameters, cv = 5)
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(n_neighbors=9),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])})

In [39]:
grid_search.score(X_test, y_test)

0.9132947976878613

In [40]:
grid_search.best_params_, grid_search.best_score_

({'n_neighbors': 7}, 0.92475884244373)