# Car Evaluation Dataset

In [335]:
import pandas as pd

Importing dataset from https://archive.ics.uci.edu/ml/datasets/Car+Evaluation

## Data Preprocessing

### Importing the dataset

In [336]:
dataset = pd.read_csv("/carevaluation.csv")

In [337]:
dataset.head()

Unnamed: 0,no,buying,maint,doors,persons,lug_boot,safety,class
0,0,vhigh,vhigh,2,2,small,low,unacc
1,1,vhigh,vhigh,2,2,small,med,unacc
2,2,vhigh,vhigh,2,2,small,high,unacc
3,3,vhigh,vhigh,2,2,med,low,unacc
4,4,vhigh,vhigh,2,2,med,med,unacc


In [338]:
dataset.shape

(1728, 8)

In [339]:
dataset.describe(include= 'all')

Unnamed: 0,no,buying,maint,doors,persons,lug_boot,safety,class
count,1728.0,1728,1728,1728.0,1728.0,1728,1728,1728
unique,,4,4,,,3,3,4
top,,vhigh,vhigh,,,small,low,unacc
freq,,432,432,,,576,576,1210
mean,863.5,,,3.5,3.666667,,,
std,498.974949,,,1.118358,1.24758,,,
min,0.0,,,2.0,2.0,,,
25%,431.75,,,2.75,2.0,,,
50%,863.5,,,3.5,4.0,,,
75%,1295.25,,,4.25,5.0,,,


### Encoding categorical data

In [340]:
from sklearn.preprocessing import LabelEncoder

In [341]:
class MultiColumnLabelEncoder:
    def __init__(self,columns = None):
        self.columns = columns # array of column names to encode

    def fit(self,X,y=None):
        return self # not relevant here

    def transform(self,X):
        '''
        Transforms columns of X specified in self.columns using
        LabelEncoder(). If no columns specified, transforms all
        columns in X.
        '''
        output = X.copy()
        if self.columns is not None:
            for col in self.columns:
                output[col] = LabelEncoder().fit_transform(output[col])
        else:
            for colname,col in output.iteritems():
                output[colname] = LabelEncoder().fit_transform(col)
        return output

    def fit_transform(self,X,y=None):
        return self.fit(X,y).transform(X)

In [342]:
dataset = MultiColumnLabelEncoder(columns = ['buying','maint','lug_boot','safety','class']).fit_transform(dataset)

### Split feature construction and class

In [343]:
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [344]:
print(X)

[[3 3 2 2 2 1]
 [3 3 2 2 2 2]
 [3 3 2 2 2 0]
 ...
 [1 1 5 5 0 1]
 [1 1 5 5 0 2]
 [1 1 5 5 0 0]]


In [345]:
print(y)

[2 2 2 ... 2 1 3]


### Splitting the dataset into the Training set and Test set

In [346]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

# Train the model

In [347]:
from sklearn.neural_network import MLPClassifier

In [348]:
mlp = MLPClassifier(max_iter=500, activation = 'relu', momentum=0.2, power_t=0.3)
mlp

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.2, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.3, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [349]:
mlp.fit(X_train,y_train)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.2, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.3, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

# Predict

In [350]:
y_pred = mlp.predict(X_test)

In [351]:
from sklearn.metrics import classification_report,confusion_matrix, accuracy_score

confusion_matrix(y_test,y_pred)

array([[ 96,   0,  14,   1],
       [ 10,  17,   0,   1],
       [ 10,   1, 347,   0],
       [  1,   0,   0,  21]])

In [352]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.82      0.86      0.84       111
           1       0.94      0.61      0.74        28
           2       0.96      0.97      0.97       358
           3       0.91      0.95      0.93        22

    accuracy                           0.93       519
   macro avg       0.91      0.85      0.87       519
weighted avg       0.93      0.93      0.93       519



In [353]:
accuracy_score(y_test, y_pred)

0.9267822736030829