# Car test-time prediction

## Loading MB dataset

In [41]:
import pandas as pd
data = pd.read_csv('mercedes_test.csv')

## Data pre-processing

In [42]:
# Choose categorical data columns
cf = data.select_dtypes(include=['object']).columns
# To change it into "categorical" data type
data[cf]=data[cf].astype('category')
# One hot encoding
data = pd.get_dummies(data)
# Obtain X from data (excluding 'ID' and 'y')
X_df = data.drop(['ID','y'],axis=1)
# Obtain y from data
y_df = data['y']

# Convert y_df into binary labels
import numpy as np
TF_vector= (y_df<np.median(y_df))
y_df=TF_vector.astype(float)

# Conver data frame into numpy array
X,y = X_df.values, y_df.values

# Split into train and test datasets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,stratify=y)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3788, 563)
(421, 563)
(3788,)
(421,)


## Use of TensorBoard for visualization

In [43]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [44]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense 
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler

import os
from tensorflow.keras.callbacks import TensorBoard

In [45]:
# Construction of an DNN model
def create_model(n_layer=2,lambda_=0):
    model = Sequential()
    for i in range(n_layer-1):
        model.add(Dense(10, activation='relu',
                    kernel_regularizer=l2(lambda_), bias_regularizer=l2(lambda_)))
    
    model.add(Dense(1, activation='sigmoid',
              kernel_regularizer=l2(lambda_), bias_regularizer=l2(lambda_)))
    return model

## [1] 2-layer DNN, w/o regularization

In [46]:
model = create_model(n_layer=2,lambda_=0)
opt = Adam(learning_rate=1e-3)
model.compile(optimizer=opt,
              loss='binary_crossentropy',
              metrics=['acc'])

In [47]:
# generate a path directory where logs info will be saved
logdir = os.path.join('logs','[1]no_regularization')
#file_name = 'no_regularization'
tb_callback = TensorBoard(logdir)
#tb_callback = TensorBoard(log_dir="logs\\{}".format(file_name))
es_callback = EarlyStopping(monitor='val_acc',patience=20)
hist = model.fit(X_train, y_train, 
                validation_split=1/9, epochs=100, 
                verbose=0,callbacks=[tb_callback, es_callback])
model.evaluate(X_test, y_test)



[0.32937920093536377, 0.8836104273796082]

In [48]:
%tensorboard --logdir=logs/

Reusing TensorBoard on port 6006 (pid 7064), started 7:12:32 ago. (Use '!kill 7064' to kill it.)

## [2] Number of layers (w/o regularization) 

In [None]:
n_layer_list = [1,2,3,4,5]

for n_layer in n_layer_list:
    model = create_model(n_layer=n_layer,lambda_=0)
    opt = Adam(learning_rate=1e-3)
    model.compile(optimizer=opt,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    logdir = os.path.join('logs','[2]n_layer_wo_regularization','{}_layer'.format(n_layer))
    tb_callback = TensorBoard(logdir)
    es_callback = EarlyStopping(monitor='val_acc', patience=20)
    hist = model.fit(X_train, y_train, 
                  validation_split=1/9, epochs = 100, 
                  verbose=0,callbacks=[tb_callback, es_callback])
    model.evaluate(X_test, y_test)

In [None]:
%tensorboard --logdir=logs/[2]n_layer_wo_regularization

## [3] With regularization (2-layer DNN) 

In [None]:
lambda_list = [1e-3, 1e-2, 1e-1, 1, 10]
for lambda_ in lambda_list:
    model = create_model(n_layer=2, lambda_=lambda_)
    opt = Adam(learning_rate=1e-3)
    model.compile(optimizer=opt,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    logdir = os.path.join('logs','[3]2_layer_w_regularization','lambda_{}'.format(lambda_))
    tb_callback = TensorBoard(logdir)
    es_callback = EarlyStopping(monitor='val_acc',patience=20)
    hist = model.fit(X_train, y_train, 
                  validation_split=1/9, epochs = 100, 
                  verbose=0, callbacks=[tb_callback, es_callback] )
    model.evaluate(X_test, y_test)

In [None]:
%tensorboard --logdir=logs/[3]2_layer_w_regularization

## [4] Learning rate (2-layer DNN)

In [None]:
lr_list = [1e-1, 1e-2, 1e-3, 1e-4]

for lr in lr_list:
    model = create_model(n_layer=2, lambda_=0)
    opt = Adam(learning_rate=lr)
    model.compile(optimizer=opt,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    logdir = os.path.join('logs','[4]2_layer_lr','lr_{}'.format(lr))
    tb_callback = TensorBoard(logdir)
    es_callback = EarlyStopping(monitor='val_acc',patience=20)
    hist = model.fit(X_train, y_train, 
                  validation_split=1/9, epochs = 100, 
                  verbose=0,callbacks=[tb_callback,es_callback])
    model.evaluate(X_test, y_test)

In [None]:
%tensorboard --logdir=logs/[4]2_layer_lr

## [5] Effect of learning rate decay

In [None]:
# w/o learning rate decay
model = create_model(n_layer=2, lambda_=0)
opt = Adam(learning_rate=1e-3)
model.compile(optimizer=opt,
              loss='binary_crossentropy',
              metrics=['acc'])
logdir = os.path.join('logs','[5]lr_decay','none')
tb_callback = TensorBoard(logdir)
es_callback = EarlyStopping(monitor='val_acc',patience=20)
hist = model.fit(X_train, y_train, 
                validation_split=1/9, epochs = 100, 
                verbose=0, callbacks=[tb_callback, es_callback] )
model.evaluate(X_test,y_test)

In [None]:
# w/ learning rate decay
model = create_model(n_layer=2, lambda_=0)
opt = Adam(learning_rate=1e-3)
model.compile(optimizer=opt,
              loss='binary_crossentropy',
              metrics=['acc'])
logdir = os.path.join('logs','[5]lr_decay','step_decay')
tb_callback = TensorBoard(logdir)
es_callback = EarlyStopping(monitor='val_acc',patience=20)
def scheduler(epoch, lr):
    if epoch in [30, 60, 90]:
        lr = lr*0.1
    return lr
lrs_callback = LearningRateScheduler(scheduler)
hist = model.fit(X_train, y_train, 
                validation_split=1/9, epochs = 100, 
                verbose=0, callbacks=[tb_callback,es_callback,lrs_callback] )
model.evaluate(X_test, y_test)

In [None]:
%tensorboard --logdir=logs/[5]lr_decay