# Car test-time prediction

## Loading MB dataset

In [1]:
import pandas as pd
data = pd.read_csv('mercedes_test.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'mercedes_test.csv'

## Data pre-processing

In [None]:
# Choose categorical data columns
cf = data.select_dtypes(include=['object']).columns
# To change it into "categorical" data type
data[cf]=data[cf].astype('category')
# One hot encoding
data = pd.get_dummies(data)
# Obtain X from data (excluding 'ID' and 'y')
X_df = data.drop(['ID','y'],axis=1)
# Obtain y from data
y_df = data['y']

# Convert y_df into binary labels
import numpy as np
TF_vector= (y_df<np.median(y_df))
y_df=TF_vector.astype(float)

# Conver data frame into numpy array
X,y = X_df.values, y_df.values

# Split into train and test datasets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,stratify=y)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

## DNN: Hyparameter search via cross validation

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

In [None]:
pip install scikeras

In [None]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
#from scikeras.wrappers import KerasClassifier

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
def build_model(n_layer=2,lambda_=0,lr=1e-3):
    model = Sequential()
    for i in range(n_layer-1):
        model.add(Dense(20,activation='relu',
                  kernel_regularizer=l2(lambda_),bias_regularizer=l2(lambda_)))

    model.add(Dense(1, activation='sigmoid',
                  kernel_regularizer=l2(lambda_),bias_regularizer=l2(lambda_)))
    optimizer = Adam(learning_rate=lr)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    return model

In [None]:
# return a scikit-learn-like Keras model
model = KerasClassifier(build_model)
n_layer = [2,5,10]
lambda_ = [1e-3,1e-2,1e-1,1,10]
grid = {'n_layer':n_layer,'lambda_':lambda_}
#grid = dict(n_layer=n_layer,lambda_=lambda_)
cv = RandomizedSearchCV(model,grid,n_iter=15,cv=5)

In [None]:
cv.fit(X_train,y_train,epochs=10,verbose=0)

In [None]:
cv.cv_results_ # logs results

{'mean_fit_time': array([3.06601644, 3.82905293, 6.10004716, 2.88781676, 4.27630439,
        6.03118043, 2.79550567, 3.62947946, 5.44901824, 2.92013431,
        3.9192627 , 5.30468607, 2.76934686, 3.89925404, 5.59904408]),
 'std_fit_time': array([0.23373162, 0.17218339, 0.31065451, 0.27062337, 0.35877938,
        0.28334342, 0.14102986, 0.12940562, 0.21499381, 0.38372727,
        0.19477592, 0.10804774, 0.16849958, 0.28835562, 0.16325863]),
 'mean_score_time': array([0.1961771 , 0.30822968, 0.35282798, 0.19484248, 0.29811249,
        0.38234458, 0.1984549 , 0.26301684, 0.32303238, 0.20584655,
        0.24384947, 0.38469467, 0.20960279, 0.29249825, 0.3631844 ]),
 'std_score_time': array([0.0133205 , 0.07441279, 0.07247743, 0.00705605, 0.05835765,
        0.05312108, 0.00787894, 0.01601813, 0.01323504, 0.01020617,
        0.01294958, 0.09252542, 0.01193084, 0.08635409, 0.04688477]),
 'param_n_layer': masked_array(data=[2, 5, 10, 2, 5, 10, 2, 5, 10, 2, 5, 10, 2, 5, 10],
              mask

## Store logs into csv file

In [None]:
# Store logs into csv file
import pandas as pd
df_DNN=pd.DataFrame.from_dict(cv.cv_results_,orient='columns')
# Select columns to be stored
columns = ['params','mean_test_score','std_test_score','rank_test_score']
df_DNN = df_DNN[columns]
df_DNN.to_csv("logs_DNN.csv")

## Save the best model

In [None]:
best_model_DNN=cv.best_estimator_
best_model_DNN.model.save('best_model_DNN')

INFO:tensorflow:Assets written to: best_model_DNN\assets


## Load the best model

In [None]:
from tensorflow.keras.models import load_model
loaded_model = load_model('best_model_DNN')
loaded_model.evaluate(X_test, y_test)



[0.41655808687210083, 0.8479809761047363]