In [1]:
# We will use the LIAR dataset from this paper: https://aclanthology.org/P17-2067/
# Link https://www.cs.ucsb.edu/~william/data/liar_dataset.zip

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pandas as pd
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [4]:
# Load and see training data
liar_data_train = pd.read_csv(os.path.join(r'/content/drive/MyDrive/HW2/liar_dataset',
                                           'train.tsv'), sep='\t', header=None)
liar_data_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,2635.json,false,Says the Annies List political group supports ...,abortion,dwayne-bohac,State representative,Texas,republican,0.0,1.0,0.0,0.0,0.0,a mailer
1,10540.json,half-true,When did the decline of coal start? It started...,"energy,history,job-accomplishments",scott-surovell,State delegate,Virginia,democrat,0.0,0.0,1.0,1.0,0.0,a floor speech.
2,324.json,mostly-true,"Hillary Clinton agrees with John McCain ""by vo...",foreign-policy,barack-obama,President,Illinois,democrat,70.0,71.0,160.0,163.0,9.0,Denver
3,1123.json,false,Health care reform legislation is likely to ma...,health-care,blog-posting,,,none,7.0,19.0,3.0,5.0,44.0,a news release
4,9028.json,half-true,The economic turnaround started at the end of ...,"economy,jobs",charlie-crist,,Florida,democrat,15.0,9.0,20.0,19.0,2.0,an interview on CNN


In [5]:
print(set(liar_data_train[1].to_list()))

{'barely-true', 'half-true', 'false', 'pants-fire', 'mostly-true', 'true'}


In [6]:
# Categories
cat_dict = {'pants-fire': 0,
            'false': 1,
            'barely-true': 2,
            'half-true': 3,
            'mostly-true': 4,
            'true': 5}

In [7]:
# Load all splits of data
# Write an API to load training, validation, and test sets
def load_dataset(split_type='train'):
    '''
    split_type is either 'train', 'valid', or 'test'
    Returns X, y
    '''
    liar_data = pd.read_csv(os.path.join(r'/content/drive/MyDrive/HW2/liar_dataset',
                                         '{}.tsv'.format(split_type)),
                            sep='\t',
                            header=None)
    cat_dict = {'pants-fire': 0,
            'false': 1,
            'barely-true': 2,
            'half-true': 3,
            'mostly-true': 4,
            'true': 5}

    X = liar_data[2].to_list()
    y = [cat_dict[i] for i in liar_data[1].to_list()]
    return X, y

X_train, y_train = load_dataset('train')
X_valid, y_valid = load_dataset('valid')
X_test, y_test = load_dataset('test')

In [8]:
# Preprocess text
# Create tokenizer
tokenizer = Tokenizer()
# fit the tokenizer on the documents
tokenizer.fit_on_texts(X_train)

# define vocabulary size (largest integer value)
vocab_size = len(tokenizer.word_index) + 1

# sequence encode
encoded_train = tokenizer.texts_to_sequences(X_train)

# pad sequences
# max_length will be a tunable hyperparameter
max_length = 20
X_train = pad_sequences(encoded_train, maxlen=max_length, padding='post')

# Preprocess validation set
# sequence encode
encoded_valid = tokenizer.texts_to_sequences(X_valid)

# pad sequences
X_valid = pad_sequences(encoded_valid, maxlen=max_length, padding='post')


# Preprocess test set
# sequence encode
encoded_test = tokenizer.texts_to_sequences(X_test)

# pad sequences
X_test = pad_sequences(encoded_test, maxlen=max_length, padding='post')

In [9]:
# Convert categories to one-hot encoding
y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)
y_test = to_categorical(y_test)

# Start your solutions below

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, Dense, Flatten, MaxPooling1D, Embedding, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


import string
from collections import Counter

In [11]:
print(len(X_train))

10240


In [12]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
!pip install -q -U keras-tuner
import keras_tuner as kt

possible_activations = ["linear", "sigmoid", "relu", "tanh"]
possible_lr = [1e-2, 1e-3, 1e-4, 1e-5]
possible_dropout = [0.2, 0.3, 0.4, 0.5]

#CNN without pre-trained embeddings
def NLP_Model_A_Train(hp):
  filters_num = hp.Int('filters1', min_value=16, max_value=64, step=4)
  neurons_num2 = hp.Int('neurons_num2', min_value=10, max_value=100, step=10)
  hp_activation1 = hp.Choice('activation1', values=possible_activations)
  hp_activation2 = hp.Choice('activation2', values=possible_activations)
  hp_activation3 = hp.Choice('activation3', values=possible_activations)
  hp_learning_rate = hp.Choice('learning_rate', values=possible_lr)
  hp_dropout_rate1 = hp.Choice('dropout_rate1', values=possible_dropout)

  model = Sequential()
  model.add(Embedding(vocab_size, 100, input_length=max_length))
  model.add(Conv1D(filters=filters_num, kernel_size=8, activation=hp_activation1))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model



[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m122.9/129.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.5/129.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/950.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m501.8/950.8 kB[0m [31m14.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m942.1/950.8 kB[0m [31m16.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hUsing TensorFlow backend


In [13]:
  tuner = kt.RandomSearch(NLP_Model_A_Train,
                      objective='val_accuracy',
                      directory='my_dir',
                      overwrite=True,
                      max_trials = 10)

In [14]:
# Early stopping
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=0,
                   patience=20,
                   restore_best_weights=True)

tuner.search(X_train, y_train,
             epochs=100,
             validation_data=(X_valid, y_valid),
             callbacks=[es])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 1 Complete [00h 00m 55s]
val_accuracy: 0.090342678129673

Best val_accuracy So Far: 0.090342678129673
Total elapsed time: 00h 00m 55s

Search: Running Trial #2

Value             |Best Value So Far |Hyperparameter
52                |48                |filters1
20                |80                |neurons_num2
relu              |linear            |activation1
tanh              |linear            |activation2
linear            |tanh              |activation3
0.0001            |0.01              |learning_rate
0.5               |0.2               |dropout_rate1

Epoch 1/100
 18/320 [>.............................] - ETA: 28s - loss: 6.7735 - accuracy: 0.1493

KeyboardInterrupt: ignored

In [None]:
print(
    best_hps.get('filters1'),
    best_hps.get('neurons_num2')
)

print(
    best_hps.get('activation1'),
    best_hps.get('activation2'),
    best_hps.get('activation3'),

    )
print(
    best_hps.get('dropout_rate1'),
    )
print(
    best_hps.get('learning_rate')
    )

In [None]:
#CNN without pre-trained embeddings
def NLP_Model_A():
  filters_num = 24
  neurons_num2 = 50
  hp_activation1 = "linear"
  hp_activation2 = "tanh"
  hp_activation3 = "sigmoid"
  hp_learning_rate = 0.0001
  hp_dropout_rate1 = 0.4

  model = Sequential()
  model.add(Embedding(vocab_size, 100, input_length=max_length))
  model.add(Conv1D(filters=filters_num, kernel_size=8, activation=hp_activation1))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model


In [None]:
NLP_A = NLP_Model_A()

In [None]:
# fit network
history = NLP_A.fit(
    X_train,
    y_train,
    epochs=50,
    verbose=2,
    batch_size=64,
    validation_data=(X_test, y_test))

print(history)


In [None]:
# plot diagnostic learning curves
def summarize_diagnostics(history):
  fig, ax = plt.subplots(1,2, figsize=(20, 10))
  # plot loss
  ax[0].set_title('Loss Curves', fontsize=20)
  ax[0].plot(history.history['loss'], label='train')
  ax[0].plot(history.history['val_loss'], label='test')
  ax[0].set_xlabel('Epochs', fontsize=15)
  ax[0].set_ylabel('Loss', fontsize=15)
  ax[0].legend(fontsize=15)
  # plot accuracy
  ax[1].set_title('Classification Accuracy', fontsize=20)
  ax[1].plot(history.history['accuracy'], label='train')
  ax[1].plot(history.history['val_accuracy'], label='test')
  ax[1].set_xlabel('Epochs', fontsize=15)
  ax[1].set_ylabel('Accuracy', fontsize=15)
  ax[1].legend(fontsize=15)

summarize_diagnostics(history)

In [None]:
# Bidirectional LSTM without pre-trained embeddings
def NLP_Model_B_Train(hp):
  filters_num = hp.Int('filters1', min_value=16, max_value=64, step=4)
  neurons_num2 = hp.Int('neurons_num2', min_value=10, max_value=100, step=10)
  hp_activation1 = hp.Choice('activation1', values=possible_activations)
  hp_activation2 = hp.Choice('activation2', values=possible_activations)
  hp_activation3 = hp.Choice('activation3', values=possible_activations)
  hp_learning_rate = hp.Choice('learning_rate', values=possible_lr)
  hp_dropout_rate1 = hp.Choice('dropout_rate1', values=possible_dropout)

  model = Sequential()
  model.add(Embedding(vocab_size, 100, input_length=max_length))
  #model.add(Conv1D(filters=filters_num, kernel_size=8, activation=hp_activation1))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(100)))
  #model.add(MaxPooling1D(pool_size=3))
  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
  tuner = kt.RandomSearch(NLP_Model_B_Train,
                      objective='val_accuracy',
                      directory='my_dir',
                      overwrite=True,
                      max_trials = 10)

In [None]:
# Early stopping
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=0,
                   patience=20,
                   restore_best_weights=True)

tuner.search(X_train, y_train,
             epochs=100,
             validation_data=(X_valid, y_valid),
             callbacks=[es])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
print(
    best_hps.get('filters1'),
    best_hps.get('neurons_num2')
)

print(
    best_hps.get('activation1'),
    best_hps.get('activation2'),
    best_hps.get('activation3'),

    )
print(
    best_hps.get('dropout_rate1'),
    )
print(
    best_hps.get('learning_rate')
    )

In [None]:
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, GlobalMaxPooling1D, Dense

# Bidirectional LSTM without pre-trained embeddings
def NLP_Model_B():
  filters_num = 20
  neurons_num2 = 50
  hp_activation1 = "linear"
  hp_activation2 = "linear"
  hp_activation3 = "sigmoid"
  hp_learning_rate = 1e-05
  hp_dropout_rate1 = 0.4

  model = Sequential()
  model.add(Embedding(vocab_size, 100, input_length=max_length))
  # model.add(Bidirectional(LSTM(100)))
  model.add(Bidirectional(LSTM(100, return_sequences=True)))
  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
NLP_B = NLP_Model_B()

In [None]:
# fit network

es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=1,
                   patience=10,
                   restore_best_weights=True)

history2 = NLP_B.fit(
    X_train,
    y_train,
    epochs=50,
    verbose=2,
    batch_size=64,
    callbacks=[es],
    validation_data=(X_test, y_test))

print(history2)

# model_one = Actitecture_One_CNN()

# es = EarlyStopping(monitor='val_loss',
#                    mode='min',
#                    verbose=1,
#                    patience=10,
#                    restore_best_weights=True)

# history = model_one.fit(train_ds,
#                     epochs=20,
#                     batch_size=batch_size,
#                     validation_data=val_ds,
#                     callbacks=[es],
#                     verbose=1)
summarize_diagnostics(history2)


In [None]:
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, GlobalMaxPooling1D, Dense
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip -q glove.6B.zip



In [None]:
!dir

In [None]:
# Create a dict mapping words to embeddings
# Various embedding sizes are available (50, 100, 200, 300), we will use 100
path_to_glove_file = "glove.6B.100d.txt"
embedding_dim = 100
embedding_index = {}
with open(path_to_glove_file) as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = vector



In [None]:
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, index in tokenizer.word_index.items():
    if index < vocab_size:
        embedding_vector = embedding_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[index] = embedding_vector

In [None]:
# CNN with pre-trained embeddings
def NLP_Model_C_Train(hp):
  filters_num = hp.Int('filters1', min_value=16, max_value=64, step=4)
  neurons_num2 = hp.Int('neurons_num2', min_value=10, max_value=100, step=10)
  hp_activation1 = hp.Choice('activation1', values=possible_activations)
  hp_activation2 = hp.Choice('activation2', values=possible_activations)
  hp_activation3 = hp.Choice('activation3', values=possible_activations)
  hp_learning_rate = hp.Choice('learning_rate', values=possible_lr)
  hp_dropout_rate1 = hp.Choice('dropout_rate1', values=possible_dropout)

  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length, weights=[embedding_matrix], trainable=True))
  # model.add(Embedding(vocab_size, 100, input_length=max_length))
  # model.add(Bidirectional(LSTM(100)))
  # model.add(Bidirectional(LSTM(100, return_sequences=True)))
  # model.add(Conv1D(128, 5, activation=hp_activation1))
  model.add(Conv1D(filters=filters_num, kernel_size=8, activation=hp_activation1))
  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
  tuner = kt.RandomSearch(NLP_Model_C_Train,
                      objective='val_accuracy',
                      directory='my_dir',
                      overwrite=True,
                      max_trials = 10)

In [None]:
# Early stopping
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=0,
                   patience=20,
                   restore_best_weights=True)

tuner.search(X_train, y_train,
             epochs=50,
             validation_data=(X_valid, y_valid),
             callbacks=[es])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
print(
    best_hps.get('filters1'),
    best_hps.get('neurons_num2')
)

print(
    best_hps.get('activation1'),
    best_hps.get('activation2'),
    best_hps.get('activation3'),

    )
print(
    best_hps.get('dropout_rate1'),
    )
print(
    best_hps.get('learning_rate')
    )

In [None]:
# CNN with pre-trained embeddings
def NLP_Model_C():
  filters_num = 64
  neurons_num2 = 30
  hp_activation1 = "tanh"
  hp_activation2 = "relu"
  hp_activation3 = "relu"
  hp_learning_rate = 0.01
  hp_dropout_rate1 = 0.2

  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length, weights=[embedding_matrix], trainable=True))
  # model.add(Embedding(vocab_size, 100, input_length=max_length))
  # model.add(Bidirectional(LSTM(100)))
  # model.add(Bidirectional(LSTM(100, return_sequences=True)))
  # model.add(Conv1D(128, activation=hp_activation1))
  model.add(Conv1D(filters=filters_num, kernel_size=8, activation=hp_activation1))
  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
NLP_C = NLP_Model_C()

In [None]:
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=1,
                   patience=10,
                   restore_best_weights=True)

history3 = NLP_C.fit(
    X_train,
    y_train,
    epochs=50,
    verbose=2,
    batch_size=64,
    callbacks=[es],
    validation_data=(X_test, y_test))

print(history3)

In [None]:
summarize_diagnostics(history3)

In [None]:
# Bidirectional LSTM with pre-trained embeddings
def NLP_Model_D_Train(hp):
  filters_num = hp.Int('filters1', min_value=16, max_value=64, step=4)
  neurons_num2 = hp.Int('neurons_num2', min_value=10, max_value=100, step=10)
  hp_activation1 = hp.Choice('activation1', values=possible_activations)
  hp_activation2 = hp.Choice('activation2', values=possible_activations)
  hp_activation3 = hp.Choice('activation3', values=possible_activations)
  hp_learning_rate = hp.Choice('learning_rate', values=possible_lr)
  hp_dropout_rate1 = hp.Choice('dropout_rate1', values=possible_dropout)

  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length, weights=[embedding_matrix], trainable=False))
  # model.add(Embedding(vocab_size, 100, input_length=max_length))
  model.add(Bidirectional(LSTM(100, return_sequences=True)))
  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
  tuner = kt.RandomSearch(NLP_Model_D_Train,
                      objective='val_accuracy',
                      directory='my_dir',
                      overwrite=True,
                      max_trials = 10)

In [None]:
# Early stopping
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=0,
                   patience=20,
                   restore_best_weights=True)

tuner.search(X_train, y_train,
             epochs=50,
             validation_data=(X_valid, y_valid),
             callbacks=[es])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
print(
    best_hps.get('filters1'),
    best_hps.get('neurons_num2')
)

print(
    best_hps.get('activation1'),
    best_hps.get('activation2'),
    best_hps.get('activation3'),

    )
print(
    best_hps.get('dropout_rate1'),
    )
print(
    best_hps.get('learning_rate')
    )

In [None]:
# Bidirectional LSTM with pre-trained embeddings
def NLP_Model_D():
  filters_num = 44
  neurons_num2 = 80
  hp_activation1 = "relu"
  hp_activation2 = "linear"
  hp_activation3 = "sigmoid"
  hp_learning_rate = 0.4
  hp_dropout_rate1 = 0.0001

  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length, weights=[embedding_matrix], trainable=False))
  # model.add(Embedding(vocab_size, 100, input_length=max_length))
  model.add(Bidirectional(LSTM(100, return_sequences=True)))
  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate1))
  model.add(Flatten())
  model.add(Dense(neurons_num2, activation=hp_activation2))
  model.add(Dense(6, activation=hp_activation3))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
NLP_D = NLP_Model_D()

In [None]:
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=1,
                   patience=10,
                   restore_best_weights=True)

history4 = NLP_D.fit(
    X_train,
    y_train,
    epochs=50,
    verbose=2,
    batch_size=64,
    callbacks=[es],
    validation_data=(X_test, y_test))

print(history4)

In [None]:
summarize_diagnostics(history4)

# Part 2 - Analysis

In [None]:
def summarize_diagnostics_full(history, metric_names=['loss', 'auc', 'accuracy']):
    fig, ax = plt.subplots(1,3, figsize=(30, 10))
    # plot loss
    ax[0].set_title('Loss Curves', fontsize=20)
    ax[0].plot(history.history[metric_names[0]], label='train')
    ax[0].plot(history.history['val_' + metric_names[0]], label='val')
    ax[0].set_xlabel('Epochs', fontsize=15)
    ax[0].set_ylabel('Loss', fontsize=15)
    ax[0].legend(fontsize=15)
    # plot AUC
    ax[1].set_title('Classification AUC', fontsize=20)
    ax[1].plot(history.history[metric_names[1]], label='train')
    ax[1].plot(history.history['val_' + metric_names[1]], label='val')
    ax[1].set_xlabel('Epochs', fontsize=15)
    ax[1].set_ylabel('AUROC', fontsize=15)
    ax[1].legend(fontsize=15)
    # plot accuracy
    ax[2].set_title('Classification accuracy', fontsize=20)
    ax[2].plot(history.history[metric_names[2]], label='train')
    ax[2].plot(history.history['val_' + metric_names[2]], label='val')
    ax[2].set_xlabel('Epochs', fontsize=15)
    ax[2].set_ylabel('Accuracy', fontsize=15)
    ax[2].legend(fontsize=15)

In [None]:
summarize_diagnostics(history)
summarize_diagnostics(history2)
summarize_diagnostics(history3)
summarize_diagnostics(history4)

We can see the best model both for time training but also for accuracy was the NLP model with LSTM and pre-trained weights. This is likely just do to getting luckier with the random choices from hypertuning, however it may also be due to LSTM being better at working with data within memory, which was an issue for earlier models.

I did not acomplish the 29.7% accuracy but the next cells will be my attempts at the bonus.

# Bonus

All the best accuracy was within the first 5 epochs, so to ensure I get the parameters, I am lowering the epochs and cranking the trials up.

In [None]:
# Bidirectional LSTM with pre-trained embeddings
def NLP_Model_Bonus_Train(hp):
  filters_num1 = hp.Int('filters1', min_value=16, max_value=256, step=4)
  filters_num2 = hp.Int('filters2', min_value=16, max_value=256, step=4)
  neurons_num1 = hp.Int('neurons_num1', min_value=10, max_value=500, step=10)
  neurons_num2 = hp.Int('neurons_num2', min_value=10, max_value=500, step=10)

  hp_activation1 = hp.Choice('activation1', values=possible_activations)
  hp_activation2 = hp.Choice('activation2', values=possible_activations)
  hp_activation3 = hp.Choice('activation3', values=possible_activations)
  hp_activation4 = hp.Choice('activation4', values=possible_activations)
  hp_activation5 = hp.Choice('activation5', values=possible_activations)

  hp_learning_rate = hp.Choice('learning_rate', values=possible_lr)

  hp_dropout_rate1 = hp.Choice('dropout_rate1', values=possible_dropout)
  hp_dropout_rate2 = hp.Choice('dropout_rate2', values=possible_dropout)
  hp_dropout_rate3 = hp.Choice('dropout_rate3', values=possible_dropout)

  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length, weights=[embedding_matrix], trainable=False))

  model.add(Bidirectional(LSTM(100, return_sequences=True)))

  model.add(Conv1D(filters=filters_num1, kernel_size=3, activation=hp_activation1))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(hp_dropout_rate2))

  model.add(Conv1D(filters=filters_num2, kernel_size=3, activation=hp_activation2))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(hp_dropout_rate2))


  model.add(GlobalMaxPooling1D())
  model.add(Dropout(hp_dropout_rate3))

  model.add(Flatten())
  model.add(Dense(neurons_num1, activation=hp_activation3))
  model.add(Dense(neurons_num2, activation=hp_activation4))
  model.add(Dense(6, activation=hp_activation5))
  opt = Adam(learning_rate=hp_learning_rate)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

  return model

In [None]:
  tuner = kt.RandomSearch(NLP_Model_Bonus_Train,
                      objective='val_accuracy',
                      directory='my_dir',
                      overwrite=True,
                      max_trials = 200)

In [None]:
# Early stopping
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   verbose=0,
                   patience=20,
                   restore_best_weights=True)

tuner.search(X_train, y_train,
             epochs=5,
             validation_data=(X_valid, y_valid),
             callbacks=[es])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]