In [1]:
###############################################################################
######################   UsedCarPrices_CarGurus  ##############################
######################  Regression - Nonlinear   ##############################
######################        MLP Methods        ##############################
###############################################################################

In [2]:
# Install and import packages
!pip install keras-tuner
from tensorflow import keras
import keras_tuner
from keras_tuner import BayesianOptimization
import os
import random
import pandas as pd
import numpy as np
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import datetime
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib as plt
from matplotlib import pyplot
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import seaborn as sns

print('\n')
print("TensorFlow version: {}".format(tf.__version__))
print("Eager execution is: {}".format(tf.executing_eagerly()))
print("Keras version: {}".format(tf.keras.__version__))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Collecting keras-tuner
  Downloading keras_tuner-1.1.2-py3-none-any.whl (133 kB)
[?25l[K     |██▌                             | 10 kB 26.3 MB/s eta 0:00:01[K     |█████                           | 20 kB 24.6 MB/s eta 0:00:01[K     |███████▍                        | 30 kB 16.5 MB/s eta 0:00:01[K     |█████████▉                      | 40 kB 14.3 MB/s eta 0:00:01[K     |████████████▎                   | 51 kB 7.0 MB/s eta 0:00:01[K     |██████████████▊                 | 61 kB 8.2 MB/s eta 0:00:01[K     |█████████████████▏              | 71 kB 9.1 MB/s eta 0:00:01[K     |███████████████████▋            | 81 kB 8.7 MB/s eta 0:00:01[K     |██████████████████████          | 92 kB 9.6 MB/s eta 0:00:01[K     |████████████████████████▌       | 102 kB 8.2 MB/s eta 0:00:01[K     |███████████████████████████     | 112 kB 8.2 MB/s eta 0:00:01[K     |█████████████████████████████▍  | 122 kB 8.2 MB/s eta 0:00:01[K     |███████████████████████████████▉| 133 kB 8.2 MB/s eta 0:

In [None]:
# Reproducibility in TF and Pytorch 
def init_seeds(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    session_conf = tf.compat.v1.ConfigProto()
    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                              inter_op_parallelism_threads=1)
    os.environ['TF_CUDNN_DETERMINISTIC'] ='true'
    os.environ['TF_DETERMINISTIC_OPS'] = 'true'
    tf.random.set_seed(seed)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                                config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)
    return sess

In [None]:
init_seeds(seed=42)

<tensorflow.python.client.session.Session at 0x7ff5bf614190>

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/UsedCarsCarGurus/Data/

/content/drive/MyDrive/UsedCarsCarGurus/Data


In [None]:
df = pd.read_csv('usedCars_final.csv', low_memory=False)
print('Number of rows and columns:', df.shape)

Number of rows and columns: (327912, 28)


In [None]:
# Prepare for partitioning data
X = df.drop(['price'],axis=1)
y = df['price']

In [None]:
# Set up train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,
                                                    random_state = 42)

In [None]:
# Train: Create dummy variables for categorical variables
X_train = pd.get_dummies(X_train, drop_first=True)

# Test: Create dummy variables for categorical variables
X_test = pd.get_dummies(X_test, drop_first=True)

In [None]:
# MinMax Scaling
mn = MinMaxScaler()
X_train = pd.DataFrame(mn.fit_transform(X_train))
X_test = pd.DataFrame(mn.fit_transform(X_test))

In [None]:
# Write results to ML Results
%cd /content/drive/MyDrive/UsedCarsCarGurus/ML/MLP/Results/HPO/

In [None]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

%load_ext tensorboard

In [None]:
# Set up callbacks
filepath = 'MLP_weights_only_b1_HPO.h5'

checkpoint_dir = os.path.dirname(filepath)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_folder,
                                                      histogram_freq=1)

callbacks_list = [EarlyStopping(monitor='val_loss', patience = 5),
                  ModelCheckpoint(filepath, monitor='mse', 
                                  save_best_only = True, mode='min'), 
                  tensorboard_callback]


In [None]:
# Define model for HPO
def build_model(hp): 
  model = keras.Sequential()
  for i in range(hp.Int("num_layers", 3, 7)):
        model.add(tf.keras.layers.Dense( units=hp.Int("layer_size" + str(i), min_value=20, max_value=70, step=5),
                activation="relu", kernel_initializer='normal'))
  model.add(tf.keras.layers.Dropout(0.3))
  model.add(Dense(1))
  model.compile(loss="mae", metrics=["mse"], optimizer=keras.optimizers.Adam(
            hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])))
  return model

In [None]:
# Define the search conditions 
tuner = BayesianOptimization(
    build_model,
    objective="val_loss",
    max_trials=20,
    executions_per_trial=2,
    overwrite=True,
    directory="MLP_b1_HPO",
    project_name="MLP_b1_HPO"
)

In [None]:
# Print a summary of the search space
tuner.search_space_summary()

Search space summary
Default search space size: 5
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 3, 'max_value': 7, 'step': 1, 'sampling': None}
layer_size0 (Int)
{'default': None, 'conditions': [], 'min_value': 20, 'max_value': 70, 'step': 5, 'sampling': None}
layer_size1 (Int)
{'default': None, 'conditions': [], 'min_value': 20, 'max_value': 70, 'step': 5, 'sampling': None}
layer_size2 (Int)
{'default': None, 'conditions': [], 'min_value': 20, 'max_value': 70, 'step': 5, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [None]:
# Begin the search for the best hyperparameters 
tuner.search(X_train, y_train, epochs=2, validation_split=0.2, batch_size=1,
             callbacks=callbacks_list)

Trial 20 Complete [00h 31m 58s]
val_loss: 3142.6368408203125

Best val_loss So Far: 3032.3583984375
Total elapsed time: 10h 42m 08s
INFO:tensorflow:Oracle triggered exit


In [None]:
# Retrieve the best model(s)
models = tuner.get_best_models(num_models=3)

In [None]:
# Print a summary of the results from the trials
tuner.results_summary()

Results summary
Results in MLP_b1_HPO/MLP_b1_HPO
Showing 10 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
num_layers: 3
layer_size0: 65
layer_size1: 20
layer_size2: 70
learning_rate: 0.01
layer_size3: 50
layer_size4: 55
layer_size5: 50
layer_size6: 70
Score: 3032.3583984375
Trial summary
Hyperparameters:
num_layers: 3
layer_size0: 70
layer_size1: 20
layer_size2: 70
learning_rate: 0.01
layer_size3: 25
layer_size4: 20
layer_size5: 65
layer_size6: 70
Score: 3097.086669921875
Trial summary
Hyperparameters:
num_layers: 3
layer_size0: 70
layer_size1: 20
layer_size2: 70
learning_rate: 0.01
layer_size3: 25
layer_size4: 70
layer_size5: 65
Score: 3099.398681640625
Trial summary
Hyperparameters:
num_layers: 3
layer_size0: 70
layer_size1: 20
layer_size2: 70
learning_rate: 0.01
layer_size3: 20
layer_size4: 60
layer_size5: 20
layer_size6: 65
Score: 3118.8546142578125
Trial summary
Hyperparameters:
num_layers: 3
layer_size0: 65
layer_size1: 20
layer_size2: 70


In [None]:
# Fit best model from HPO 