# Car Evaluation

The datasets  evaluates cars according to the following six input attributes: buying, maint, doors, persons, lug_boot, safety

In [7]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [8]:
#Seven columns (without column column headers)
col = ["buying", "maint", "doors", "persons","lug_boot","safety", "Car_Evaluation"]

In [9]:
cars = pd.read_csv("car.data", names= col)

In [20]:
cars.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Car_Evaluation
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [21]:
cars.isnull().sum()

buying            0
maint             0
doors             0
persons           0
lug_boot          0
safety            0
Car_Evaluation    0
dtype: int64

### Convert features into categorical

In [22]:
class MultiColumnLabelEncoder:
    def __init__(self,columns = None):
        self.columns = columns

    def fit(self,X,y=None):
        return self

    def transform(self,X):
        '''
        Transforms columns of X specified in self.columns using
        LabelEncoder(). If no columns specified, transforms all
        columns in X.
        '''
        output = X.copy()
        if self.columns is not None:
            for col in self.columns:
                output[col] = LabelEncoder().fit_transform(output[col])
        else:
            for colname,col in output.iteritems():
                output[colname] = LabelEncoder().fit_transform(col)
        return output

    def fit_transform(self,X,y=None):
        return self.fit(X,y).transform(X)

In [23]:
car = MultiColumnLabelEncoder(columns = ["buying", "maint", "doors", "persons","lug_boot",
                                   "safety", "Car_Evaluation"]).fit_transform(cars)

In [24]:
car.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Car_Evaluation
0,3,3,0,0,2,1,2
1,3,3,0,0,2,2,2
2,3,3,0,0,2,0,2
3,3,3,0,0,1,1,2
4,3,3,0,0,1,2,2


### Split dataset into features and target (X, y)

In [25]:
X = car.drop('Car_Evaluation', axis =1)
y = car['Car_Evaluation']

In [26]:
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import classification_report,accuracy_score

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state = 10)

### Define the Model / Classifier (corss validation settings)

In [28]:
kf = KFold(n_splits= 8, random_state= 10)

In [29]:
params = {}

In [30]:
nb = GaussianNB()

In [31]:
classifier = GridSearchCV(nb, cv=kf, param_grid=params, return_train_score=True)

In [32]:
classifier.fit(X_train, y_train)

GridSearchCV(cv=KFold(n_splits=8, random_state=10, shuffle=False),
       error_score='raise-deprecating',
       estimator=GaussianNB(priors=None, var_smoothing=1e-09),
       fit_params=None, iid='warn', n_jobs=None, param_grid={},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [33]:
classifier.cv_results_

{'mean_fit_time': array([0.00300041]),
 'std_fit_time': array([0.001175]),
 'mean_score_time': array([0.00119939]),
 'std_score_time': array([0.00037502]),
 'params': [{}],
 'split0_test_score': array([0.68874172]),
 'split1_test_score': array([0.63576159]),
 'split2_test_score': array([0.71523179]),
 'split3_test_score': array([0.61589404]),
 'split4_test_score': array([0.56291391]),
 'split5_test_score': array([0.60264901]),
 'split6_test_score': array([0.54]),
 'split7_test_score': array([0.55333333]),
 'mean_test_score': array([0.61442786]),
 'std_test_score': array([0.05936224]),
 'rank_test_score': array([1], dtype=int32),
 'split0_train_score': array([0.60094787]),
 'split1_train_score': array([0.61516588]),
 'split2_train_score': array([0.61800948]),
 'split3_train_score': array([0.61421801]),
 'split4_train_score': array([0.62180095]),
 'split5_train_score': array([0.61611374]),
 'split6_train_score': array([0.62026515]),
 'split7_train_score': array([0.61742424]),
 'mean_trai

In [34]:
pred = classifier.predict(X_test)

In [35]:
print(classification_report(y_test,pred))  
print('Accuracy of the classifier on test set: {:.2f}'.format(accuracy_score(y_test, pred)))

              precision    recall  f1-score   support

           0       0.60      0.15      0.24        98
           1       0.00      0.00      0.00        19
           2       0.89      0.81      0.85       378
           3       0.15      1.00      0.27        23

   micro avg       0.66      0.66      0.66       518
   macro avg       0.41      0.49      0.34       518
weighted avg       0.77      0.66      0.68       518

Accuracy of the classifier on test set: 0.66


  'precision', 'predicted', average, warn_for)


#### What is the class attribute(evaluation) for the following cases?

In [46]:
Case1=     ["low","low","5more","more","small","high"]
Case2=     ["low","low","5more","more","med","low"]
Case3=     ["low","low","5more","more","med","high"]
Case4=     ["low","low","5more","more","med","high"]

### Predict the requested Cases

In [47]:
cols = ["buying", "maint", "doors", "persons","lug_boot","safety"]

In [48]:
Cases = [cols, Case1, Case2, Case3, Case4]

In [49]:
headers = Cases.pop(0)

In [50]:
df = pd.DataFrame(Cases, columns=headers)

In [51]:
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,low,low,5more,more,small,high
1,low,low,5more,more,med,low
2,low,low,5more,more,med,high
3,low,low,5more,more,med,high


In [52]:
df = MultiColumnLabelEncoder(columns = ["buying", "maint", "doors", "persons","lug_boot",
                                   "safety"]).fit_transform(df)

In [53]:
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,0,0,0,0,1,0
1,0,0,0,0,0,1
2,0,0,0,0,0,0
3,0,0,0,0,0,0


In [54]:
prediction = classifier.predict(df)

In [55]:
prediction

array([3, 2, 3, 3])