## Car Evaluation Data

---

### Import Libraries

In [1]:
import pandas as pd

In [2]:
cols = ['price', 'maintenance', 'doors', 'persons','lug_boot','safety','decision']

In [3]:
car = pd.read_csv('Data/car_evaluation.csv', names=cols)
car.head()

Unnamed: 0,price,maintenance,doors,persons,lug_boot,safety,decision
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [4]:
car.price.unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [5]:
car.maintenance.unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [6]:
car.doors.unique()

array(['2', '3', '4', '5more'], dtype=object)

In [7]:
car.persons.unique()

array(['2', '4', 'more'], dtype=object)

In [8]:
car.lug_boot.unique()

array(['small', 'med', 'big'], dtype=object)

In [9]:
car.safety.unique()

array(['low', 'med', 'high'], dtype=object)

In [10]:
car.decision.unique()

array(['unacc', 'acc', 'vgood', 'good'], dtype=object)

In [11]:
car.describe()

Unnamed: 0,price,maintenance,doors,persons,lug_boot,safety,decision
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,low,low,2,more,small,low,unacc
freq,432,432,432,576,576,576,1210


---

### Preprocessing the Data

> Here, the dataset contains of 6 attributes and 1 class column having 4 class values{unacc, acc, good, vgood}. 


> As we are building a neural network we need to provide the neural node values it can read and not bias over a specific value of an attribute. 


> Therefore we convert all the nominal/categorical data into numeric by using **pandas.get_dummies** function. 

> This function will create additional columns of each values corresponding to each attribute, therefore increasing the number of total columns.

In [12]:
dfcar = car.copy()

In [13]:
#Implement Get Dummies
dfcar = pd.get_dummies(car, columns=cols)

In [14]:
#Display sample
dfcar.head()

Unnamed: 0,price_high,price_low,price_med,price_vhigh,maintenance_high,maintenance_low,maintenance_med,maintenance_vhigh,doors_2,doors_3,...,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med,decision_acc,decision_good,decision_unacc,decision_vgood
0,0,0,0,1,0,0,0,1,1,0,...,0,0,1,0,1,0,0,0,1,0
1,0,0,0,1,0,0,0,1,1,0,...,0,0,1,0,0,1,0,0,1,0
2,0,0,0,1,0,0,0,1,1,0,...,0,0,1,1,0,0,0,0,1,0
3,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,1,0,0,0,1,0
4,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,0,1,0


In [15]:
dfcar.shape

(1728, 25)

In [16]:
dfcar['price_high'] = dfcar['price_vhigh'] | dfcar['price_high']
dfcar['maintenance_high'] = dfcar['maintenance_vhigh'] | dfcar['maintenance_high']
dfcar['persons_more'] = dfcar['persons_4'] | dfcar['persons_more']
dfcar['decision'] = car['decision']
dfcar.head(20).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
price_high,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
price_low,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
price_med,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
price_vhigh,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
maintenance_high,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
maintenance_low,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
maintenance_med,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
maintenance_vhigh,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
doors_2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
doors_3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [17]:
col1 = ['price_high', 'price_low', 'price_med', 
        'maintenance_high', 'maintenance_low', 'maintenance_med',
        'persons_2', 'persons_more','decision']

df = dfcar[col1]
df.head()

Unnamed: 0,price_high,price_low,price_med,maintenance_high,maintenance_low,maintenance_med,persons_2,persons_more,decision
0,1,0,0,1,0,0,1,0,unacc
1,1,0,0,1,0,0,1,0,unacc
2,1,0,0,1,0,0,1,0,unacc
3,1,0,0,1,0,0,1,0,unacc
4,1,0,0,1,0,0,1,0,unacc


In [18]:
#Save the new formatted dataset
df.to_csv('Data/preprocess_car_evaluation.csv')

In [19]:
#Assign data and target

X = df.drop('decision', axis=1)
y = df['decision']
print(X.shape,y.shape)

(1728, 8) (1728,)


In [31]:
n_cols = X.shape[1]
n_cols

8

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

Using TensorFlow backend.


In [21]:
#Split the data into train and test

X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1, stratify=y)

In [22]:
#Step 1: Label encode the target set
label_encoder = LabelEncoder().fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [23]:
#Step 2: convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

---

### Create a Deep Learning Model

In [24]:
#Import Keras Deep Learning Library to create a model
from keras.models import Sequential
from keras.layers import Dense

#### Create Sequential Model

In [26]:
model = Sequential()

#### Build Layers

In [32]:
# Building our layers 
#Input layer of 9
#Hidden layer of 5
#Output layer of 4 
model.add(Dense(5, activation='relu', input_shape=(n_cols,)))
model.add(Dense(5, activation='relu'))
model.add(Dense(4, activation='softmax'))