# 2. Shallow and Dense Deep Learning Models

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import Input, layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TextVectorization, Embedding

imdb_data = tfds.load(name="imdb_reviews", split="train")
imdb_df = tfds.as_dataframe(imdb_data)
imdb_df['text'] = imdb_df['text'].str.decode('utf-8')
imdb_sample = imdb_df.sample(frac=0.2, random_state=100)

imdb_test = tfds.load(name="imdb_reviews", split="test")
imdb_test_df = tfds.as_dataframe(imdb_test)
imdb_test_df['text'] = imdb_test_df['text'].str.decode('utf-8')
imdb_test_sample = imdb_test_df.sample(frac=0.2, random_state=100)

X_train = imdb_sample['text']
y_train = imdb_sample['label']
X_test = imdb_test_sample['text']
y_test = imdb_test_sample['label']

max_tokens = 7500
output_sequence_length = 128
output_dim = 128

vectorizer_layer = TextVectorization(max_tokens=max_tokens,
                                     output_mode='int',
                                     standardize='lower_and_strip_punctuation',
                                     ngrams=(1, 2),
                                     output_sequence_length=output_sequence_length)
vectorizer_layer.adapt(X_train)

embedding_layer = Embedding(input_dim=max_tokens,
                            output_dim=output_dim,
                            input_length=output_sequence_length)

In [None]:
model_simple_dense = Sequential()
model_simple_dense.add(Input(shape=(1,), dtype=tf.string))
model_simple_dense.add(vectorizer_layer)
model_simple_dense.add(embedding_layer)
model_simple_dense.add(layers.Dense(64))
model_simple_dense.add(layers.Dense(64))
model_simple_dense.add(layers.Dense(64))
model_simple_dense.add(layers.Dense(64))
model_simple_dense.add(layers.Dense(64))
model_simple_dense.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model_simple_dense.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_simple_dense.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_simple_dense.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.5292984247207642


# 3. Initializing a GlobalAveragePooling1D Layer

In [None]:
model_glavpool = Sequential()
model_glavpool.add(Input(shape=(1,), dtype=tf.string))
model_glavpool.add(vectorizer_layer)
model_glavpool.add(embedding_layer)
model_glavpool.add(layers.GlobalAveragePooling1D())
model_glavpool.add(layers.Dense(128, activation='relu'))
model_glavpool.add(layers.Dense(64, activation='relu'))
model_glavpool.add(layers.Dense(32, activation='relu'))
model_glavpool.add(layers.Dense(16, activation='relu'))
model_glavpool.add(layers.Dense(8, activation='relu'))
model_glavpool.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model_glavpool.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_glavpool.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_glavpool.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.7666000127792358


# 5. Tuning the Model Learning Rate and Activation Function

In [None]:
model_inc_learning_rate = Sequential()
model_inc_learning_rate.add(Input(shape=(1,), dtype=tf.string))
model_inc_learning_rate.add(vectorizer_layer)
model_inc_learning_rate.add(embedding_layer)
model_inc_learning_rate.add(layers.GlobalAveragePooling1D())
model_inc_learning_rate.add(layers.Dense(128, activation='relu'))
model_inc_learning_rate.add(layers.Dense(64, activation='relu'))
model_inc_learning_rate.add(layers.Dense(32, activation='relu'))
model_inc_learning_rate.add(layers.Dense(16, activation='relu'))
model_inc_learning_rate.add(layers.Dense(8, activation='relu'))
model_inc_learning_rate.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(learning_rate=0.1)
model_inc_learning_rate.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_inc_learning_rate.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_inc_learning_rate.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.5012000203132629


In [None]:
model_dec_learning_rate = Sequential()
model_dec_learning_rate.add(Input(shape=(1,), dtype=tf.string))
model_dec_learning_rate.add(vectorizer_layer)
model_dec_learning_rate.add(embedding_layer)
model_dec_learning_rate.add(layers.GlobalAveragePooling1D())
model_dec_learning_rate.add(layers.Dense(128, activation='relu'))
model_dec_learning_rate.add(layers.Dense(64, activation='relu'))
model_dec_learning_rate.add(layers.Dense(32, activation='relu'))
model_dec_learning_rate.add(layers.Dense(16, activation='relu'))
model_dec_learning_rate.add(layers.Dense(8, activation='relu'))
model_dec_learning_rate.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model_dec_learning_rate.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_dec_learning_rate.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_dec_learning_rate.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.774399995803833


Increasing the learning rate made the model perform poorly.

Decreasing the learning rate made little difference in the accuracy results but seems to have helped reduce the loss a little more.

In [None]:
model_activation_function = Sequential()
model_activation_function.add(Input(shape=(1,), dtype=tf.string))
model_activation_function.add(vectorizer_layer)
model_activation_function.add(embedding_layer)
model_activation_function.add(layers.GlobalAveragePooling1D())
model_activation_function.add(layers.Dense(128, activation='elu'))
model_activation_function.add(layers.Dense(64, activation='elu'))
model_activation_function.add(layers.Dense(32, activation='elu'))
model_activation_function.add(layers.Dense(16, activation='elu'))
model_activation_function.add(layers.Dense(8, activation='elu'))
model_activation_function.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model_activation_function.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_activation_function.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_activation_function.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.769599974155426


There seems to be slight improvement using `elu` over `relu` for the activation function on the hidden layers.

# 6. Tuning the Model Optimizer and Adding Dropout Layer

In [None]:
model_optimizer_dropout = Sequential()
model_optimizer_dropout.add(Input(shape=(1,), dtype=tf.string))
model_optimizer_dropout.add(vectorizer_layer)
model_optimizer_dropout.add(embedding_layer)
model_optimizer_dropout.add(layers.GlobalAveragePooling1D())
model_optimizer_dropout.add(layers.Dense(128, activation='elu'))
model_optimizer_dropout.add(layers.Dropout(0.6))
model_optimizer_dropout.add(layers.Dense(64, activation='elu'))
model_optimizer_dropout.add(layers.Dropout(0.5))
model_optimizer_dropout.add(layers.Dense(32, activation='elu'))
model_optimizer_dropout.add(layers.Dense(16, activation='elu'))
model_optimizer_dropout.add(layers.Dense(8, activation='elu'))
model_optimizer_dropout.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.8, momentum=0.9)
model_optimizer_dropout.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_optimizer_dropout.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_optimizer_dropout.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.7717999815940857


# 7. Regularization

In [None]:
from tensorflow.keras.regularizers import L1, L2, L1L2

imdb_train = tfds.load(name="imdb_reviews", split="train")
imdb_train_df = tfds.as_dataframe(imdb_train)
imdb_train_df['text'] = imdb_train_df['text'].str.decode('utf-8')

imdb_test = tfds.load(name="imdb_reviews", split="test")
imdb_test_df = tfds.as_dataframe(imdb_test)
imdb_test_df['text'] = imdb_test_df['text'].str.decode('utf-8')

X_train = imdb_train_df['text']
y_train = imdb_train_df['label']
X_test = imdb_test_df['text']
y_test = imdb_test_df['label']

model_regularized = Sequential()
model_regularized.add(Input(shape=(1,), dtype=tf.string))
model_regularized.add(vectorizer_layer)
model_regularized.add(embedding_layer)
model_regularized.add(layers.GlobalAveragePooling1D())
model_regularized.add(layers.Dense(128, activation='elu', kernel_regularizer=L1(0.00001)))
model_regularized.add(layers.Dropout(0.6))
model_regularized.add(layers.Dense(64, activation='elu', kernel_regularizer=L1L2(0.00001)))
model_regularized.add(layers.Dropout(0.5))
model_regularized.add(layers.Dense(32, activation='elu', kernel_regularizer=L2(0.0005)))
model_regularized.add(layers.Dense(16, activation='elu', kernel_regularizer=L2(0.0005)))
model_regularized.add(layers.Dense(8, activation='elu', kernel_regularizer=L2(0.0005)))
model_regularized.add(layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.8, momentum=0.9)
model_regularized.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

model_regularized.fit(X_train, y_train, epochs=10)
test_loss, test_acc = model_regularized.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test set accuracy: 0.8219199776649475
