# 31009 - Final Project - CNN Tunning OOD
### Ada, Rohit, Dylan

In [3]:
import numpy as np  
import pandas as pd 
import re   
import nltk  
from nltk.corpus import stopwords           
from nltk.stem.porter import PorterStemmer
from collections import Counter  
import seaborn as sns 
import matplotlib.pyplot as plt     
from IPython.core.display import display, HTML  
import string
import tensorflow as tf
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences   
from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, GRU, Convolution1D, Flatten, LeakyReLU
from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D, GlobalAveragePooling1D, MaxPooling1D, GlobalMaxPooling1D
from tensorflow.keras.layers import Concatenate
from tqdm import tqdm  
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers
from tensorflow.keras import callbacks
from tensorflow.keras.models import Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from keras.models import load_model

from tensorflow.keras.datasets import imdb

from tensorflow.keras.utils import model_to_dot

In [4]:
##Load Data
train = pd.read_csv("Cleaned_Train.csv")
train_y = train.target


In [5]:
# Tokenizer sequence and index words
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train.text)   
word_index = tokenizer.word_index    
num_words = len(tokenizer.word_index)+1
print('Number of unique words:',len(word_index))

Number of unique words: 17440


In [6]:
training_sequences = tokenizer.texts_to_sequences(train.text)  

# Ading padding at the front of text sequence
training_padded = pad_sequences(training_sequences,                                  
                                   maxlen=50,                                      
                                   padding='pre',                           
                                   truncating='pre')  

# Split data set for further training and validation
X_train, X_test, Y_train, Y_test = train_test_split(training_padded, train_y, test_size=.25,random_state=0)

In [7]:
# Matching with Glove embedding 6B.300D
mbedding_dict={}
with open('glove.6B.300d.txt','r',encoding='utf-8') as f:
    for line in f:
        values=line.split()
        word=values[0]
        vectors=np.asarray(values[1:],'float32')
        embedding_dict[word]=vectors
f.close()

embedding_dim=300
embedding_matrix = np.zeros((num_words, embedding_dim))

for word, i in tqdm(word_index.items()):
    if i < num_words:
        embedding_vector = embedding_dict.get(word)  
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

embedding_matrix.shape

100%|██████████| 17440/17440 [00:00<00:00, 499717.60it/s]


(17441, 300)

In [16]:
# Start Tuning Process

from kerastuner.tuners import RandomSearch
from kerastuner import HyperModel, RandomSearch, Hyperband, BayesianOptimization

from tensorflow import keras

# Model Hyperparameters
embedding_dim = 300

sequence_length = 50

NUM_CLASSES = 1


In [26]:
# Building CNN HyperModel Class with Search Space defined
class CNNHyperModel(HyperModel):
    def build(self, hp):
        model = Sequential()
        model.add(Embedding(num_words,
                embedding_dim,
                weights=[embedding_matrix],
                input_length=sequence_length,
                trainable=False))
        
        #Convolution1D layer with Search Space Filter: {5,10}
        #Convolution1D layer with Search Space Kernel Size: {2, 3 ,5}
        
        model.add(Convolution1D(filters=hp.Choice("num_filters", values=[5, 10], default= 5),
                             kernel_size=hp.Choice("kernel_size", values=[2, 3 ,5], default= 3),
                             padding="valid",
                             activation="relu",
                             strides=1))

        model.add(GlobalMaxPooling1D())
        
        #Dense layer with Search Space Units: {2 ~ 10}
        #Dense layer with Search Space activation Function: {"relu", "tanh", "sigmoid"}
        model.add(
            Dense(
                units=hp.Int(
                    "units", min_value=2, max_value=10, step=2, default = 2
                ),
                activation=hp.Choice(
                    "dense_activation",
                    values=["relu", "tanh", "sigmoid"],
                    default="relu",
                )
            )
        )
        
        #Dense layer with Search Space Dropout Rate: {0 ~ 0.4}
        model.add(
            Dropout(
                rate=hp.Float(
                    "dropout_1", min_value=0.0, max_value=0.4, default=0.2, step=0.05,
                )
            )
        )
        model.add(Dense(1))
        
        #Dense layer with Search Space activation Function: {"relu", "tanh", "sigmoid"}
        model.add(            
            Dense(
                units = 1,
                activation = hp.Choice(
                    "dense_activation",
                    values=["relu", "tanh", "sigmoid"],
                    default="relu",
                )
            ))

        #Dense layer with Search Space for learning rate
        model.compile(            
            optimizer=keras.optimizers.Adam(
                hp.Float(
                    "learning_rate",
                    min_value=1e-4,
                    max_value=1e-2,
                    sampling="LOG",
                    default=1e-3,
                )
            ),
            loss="binary_crossentropy",
            metrics=["accuracy"])
        return model

In [44]:
import time

from kerastuner.tuners import (
    BayesianOptimization,
    Hyperband,
    RandomSearch,
)
from loguru import logger
from pathlib import Path


# Initial Hyperparameter tuning parameter
SEED = 0

N_EPOCH_SEARCH = 40
HYPERBAND_MAX_EPOCHS = 40
MAX_TRIALS = 100
EXECUTION_PER_TRIAL = 2
BAYESIAN_NUM_INITIAL_POINTS = 1


# Helper Functions
def run_hyperparameter_tuning():
#     build the CNN model with defined search space
    hypermodel = CNNHyperModel()

#     save all history model log file
    output_dir = Path("./tuning/")
    tuners = define_tuners(
        hypermodel, directory=output_dir, project_name="simple_cnn_tuning"
    )

#     gnerate results for each tuning method
    results = []
    for tuner in tuners:
#         obtain time loss and accuracy from evaluation 
        elapsed_time, loss, accuracy = tuner_evaluation(
            tuner, X_train, X_test, Y_train, Y_test
        )
        logger.info(
            f"Elapsed time = {elapsed_time:10.4f} s, accuracy = {accuracy}, loss = {loss}"
        )
        results.append([elapsed_time, loss, accuracy])
#     log out current tuning results
    logger.info(results)


def tuner_evaluation(tuner, X_train, X_test, Y_train, Y_test):

    # Overview of the tuning task
    tuner.search_space_summary()

    # Performs the hyperparameter tuning
    logger.info("Start hyperparameter tuning")
    search_start = time.time()
    tuner.search(X_train, Y_train, epochs=N_EPOCH_SEARCH, validation_split=0.1)
    search_end = time.time()
    elapsed_time = search_end - search_start

    # Show a summary of the search
    tuner.results_summary()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model
    loss, accuracy = best_model.evaluate(X_test, Y_test)
    return elapsed_time, loss, accuracy


def define_tuners(hypermodel, directory, project_name):
    
    # Final result will be replace by bayesian tunner if run everthing together. 
    # Try comment out other two part and run though each method one by one.
    # Please check log file for final outputs
    
    # Random search tuner
    random_tuner = RandomSearch(
        hypermodel,
        objective="val_accuracy",
        seed=SEED,
        max_trials=MAX_TRIALS,
        executions_per_trial=EXECUTION_PER_TRIAL,
        directory=f"{directory}_random_search",
        project_name=project_name,
    )
    # Hyperband tuner
    hyperband_tuner = Hyperband(
        hypermodel,
        max_epochs=HYPERBAND_MAX_EPOCHS,
        objective="val_accuracy",
        seed=SEED,
        executions_per_trial=EXECUTION_PER_TRIAL,
        directory=f"{directory}_hyperband",
        project_name=project_name,
    )
    # Bayesian tuner
    bayesian_tuner = BayesianOptimization(
        hypermodel,
        objective='val_accuracy',
        seed=SEED,
        num_initial_points=BAYESIAN_NUM_INITIAL_POINTS,
        max_trials=MAX_TRIALS,
        directory=f"{directory}_bayesian",
        project_name=project_name
    )
    return [random_tuner, hyperband_tuner, bayesian_tuner]
#     return [random_tuner]
#     return [hyperband_tuner]
#     return [bayesian_tuner]


In [45]:
run_hyperparameter_tuning()

Trial 90 Complete [00h 00m 56s]
val_accuracy: 0.7950963079929352

Best val_accuracy So Far: 0.8196147084236145
Total elapsed time: 00h 24m 44s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in cifar10_hyperband\simple_cnn_tuning
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
num_filters: 10
kernel_size: 3
units: 4
dense_activation: sigmoid
dropout_1: 0.25
learning_rate: 0.0008436373753133344
tuner/epochs: 14
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.8196147084236145
Trial summary
Hyperparameters:
num_filters: 5
kernel_size: 5
units: 4
dense_activation: sigmoid
dropout_1: 0.30000000000000004
learning_rate: 0.00040536211023129484
tuner/epochs: 40
tuner/initial_epoch: 14
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 23b96fa765ae590466c2b4eab6362856
Score: 0.8196147084236145
Trial summary
Hyperparameters:
num_filters: 5
kernel_size: 5
units: 8
dense_activation: sigmoid
dropout_1: 0.350000000000000

2020-12-05 21:53:35.959 | INFO     | __main__:run_hyperparameter_tuning:36 - Elapsed time =  1484.1293 s, accuracy = 0.8009454011917114, loss = 0.46071144938468933
2020-12-05 21:53:35.960 | INFO     | __main__:run_hyperparameter_tuning:39 - [[1484.129281282425, 0.46071144938468933, 0.8009454011917114]]
