# Car Classification using MLP NN

# 1. Import libraries

In [11]:
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix,classification_report

# 2. Load data

In [12]:
column_names = ['buying','maint','doors','persons','lug_boot','safety','Class']
car_data = pd.read_csv('car.data.csv', names = column_names)
car_data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


# 3. Preprocessing

Encode categorical variables using label encoding

In [13]:
encoder = preprocessing.LabelEncoder()
car_data.iloc[:, 0:6] = car_data.iloc[:, 0:6].apply(encoder.fit_transform)
car_data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Class
0,3,3,0,0,2,1,unacc
1,3,3,0,0,2,2,unacc
2,3,3,0,0,2,0,unacc
3,3,3,0,0,1,1,unacc
4,3,3,0,0,1,2,unacc
...,...,...,...,...,...,...,...
1723,1,1,3,2,1,2,good
1724,1,1,3,2,1,0,vgood
1725,1,1,3,2,0,1,unacc
1726,1,1,3,2,0,2,good


# 4. Define inputs and outputs

The first 6 columns are extracted for input

In [14]:
inputs = car_data.iloc[:, :6]
inputs.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,0,0,2,1
1,3,3,0,0,2,2
2,3,3,0,0,2,0
3,3,3,0,0,1,1
4,3,3,0,0,1,2


The last column is extracted for output

In [15]:
outputs = car_data.iloc[:, [6]]
outputs.head()

Unnamed: 0,Class
0,unacc
1,unacc
2,unacc
3,unacc
4,unacc


# 5. Split data into training and testing

Training data is 80% and Testing data is 20%

In [16]:
inputs_train, inputs_test, outputs_train, outputs_test = train_test_split(inputs, outputs, test_size = 0.20)

# 6. Apply the transformations to the data

Fit the input data

In [17]:
scaler = StandardScaler()
scaler.fit(inputs_train)

StandardScaler()

Tranform the input data

In [18]:
inputs_train = scaler.transform(inputs_train)
inputs_test = scaler.transform(inputs_test)

# 7. Create the Multilayer Percepron Model

Use the rectifed linear unit function as the activation function

Use the stochastic gradient descent solver

Use the adaptive learning rate

Shuffle samples with each iteration

In [19]:
mlp = MLPClassifier(hidden_layer_sizes = (60, 60, 60), max_iter = 1500, activation = 'relu', solver = 'sgd', learning_rate = 'adaptive', shuffle = True, random_state = 1)

Fit the model

In [20]:
mlp.fit(inputs_train, outputs_train.values.ravel())

MLPClassifier(hidden_layer_sizes=(60, 60, 60), learning_rate='adaptive',
              max_iter=1500, random_state=1, solver='sgd')

# 8. Measure the performance

Measure accuracy

In [21]:
predictions = mlp.predict(inputs_test)
print(confusion_matrix(outputs_test, predictions, labels = ['unacc','acc','good','vgood']))

[[227   0   0   0]
 [  9  78   1   0]
 [  0   1  11   0]
 [  0   2   1  16]]


Calculate the score the prediction

In [22]:
mlp.score(inputs_test, outputs_test)

0.9595375722543352