In [1]:
import pandas as pd
import pickle
import keras
from keras.models import Sequential
from keras.layers import Input, Embedding, GRU, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import keras_tuner as kt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [2]:
EPOCHS=200
BATCH_SIZE=32
PATIENCE=5

# Load Embeddings & Inputs

In [3]:
with open('artifacts/embeddings_inputs.pkl', 'rb') as f:
    loaded_input_items = pickle.load(f)

In [4]:
embedding_matrix = loaded_input_items['embedding_matrix']
X_train_pad = loaded_input_items['X_train_pad']
X_val_pad = loaded_input_items['X_val_pad']
X_test_pad = loaded_input_items['X_test_pad']
y_train = loaded_input_items['y_train']
y_val = loaded_input_items['y_val']
y_test = loaded_input_items['y_test']

In [5]:
VOCAB_SIZE = embedding_matrix.shape[0]
EMBEDDING_DIM = embedding_matrix.shape[1]
MAX_LEN = len(X_train_pad[0])
print(f"Embedding dimension: {EMBEDDING_DIM}\nVocab size: {VOCAB_SIZE}\nMaximum input length: {MAX_LEN}")

Embedding dimension: 500
Vocab size: 35756
Maximum input length: 588


# GRU

* While LSTM has 3 gates and cell state update, GRU only has 2 gates (rest and update).
* So, there are fewer multiplications, few parameters, and faster training with lesser memory usage.
* Despite, being simpler, GRUs often perform just as well as LSTMs for text classification.

## Trainable = False

In [None]:
# by default there is bias and recurrent bias
# number of params for GRU = 3*((500+128)*128 + 2*128) = 241920

In [7]:
model = Sequential()
model.add(Input(shape=(MAX_LEN,)))
model.add(Embedding(input_dim=VOCAB_SIZE,
                    output_dim=EMBEDDING_DIM,
                    weights=[embedding_matrix],
                    trainable=False))
# defaults: activation='tanh', recurrent_activation='sigmoid'
model.add(GRU(128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', mode='min', 
                      min_delta=1e-5, patience=PATIENCE,
                      restore_best_weights=True, verbose=1)
model.fit(X_train_pad, y_train,
          validation_data=(X_val_pad, y_val),
          epochs=EPOCHS, batch_size=BATCH_SIZE,
          callbacks=[estop], verbose=1)

Epoch 1/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 246ms/step - accuracy: 0.5299 - loss: 0.6775 - val_accuracy: 0.5356 - val_loss: 0.6615
Epoch 2/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 248ms/step - accuracy: 0.5640 - loss: 0.6380 - val_accuracy: 0.5362 - val_loss: 0.7871
Epoch 3/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 237ms/step - accuracy: 0.8367 - loss: 0.3985 - val_accuracy: 0.9200 - val_loss: 0.2023
Epoch 4/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 242ms/step - accuracy: 0.9396 - loss: 0.1597 - val_accuracy: 0.9462 - val_loss: 0.1435
Epoch 5/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 250ms/step - accuracy: 0.9601 - loss: 0.1091 - val_accuracy: 0.9583 - val_loss: 0.1139
Epoch 6/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 257ms/step - accuracy: 0.9700 - loss: 0.0857 - val_accuracy: 0.9599 - val_loss: 0.0964

<keras.src.callbacks.history.History at 0x1ba81dcfec0>

In [9]:
gru_loss, gru_accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print("GRU Test accuracy:", gru_accuracy)
print("GRU Test loss:", gru_loss)

GRU Test accuracy: 0.9630170464515686
GRU Test loss: 0.0966251939535141


In [10]:
model.save('artifacts/simple_gru.keras')

In [None]:
# def build_model(hp):
#     model = Sequential()
#     model.add(Input(shape=(MAX_LEN,)))
#     model.add(Embedding(input_dim=VOCAB_SIZE,
#                         output_dim=EMBEDDING_DIM,
#                         weights=[embedding_matrix],
#                         trainable=False))
#     model.add(GRU(units=hp.Choice('gru_units', values=[64, 128, 256]), return_sequences=False))
#     model.add(Dropout(hp.Float('gru_dropout', min_value=0.2, max_value=0.5, step=0.1)))
#     model.add(Dense(units=hp.Choice('dense_units', values=[32, 64, 128, 256]), activation='relu'))
#     model.add(Dropout(hp.Float('dense_dropout', min_value=0.2, max_value=0.5, step=0.1)))
#     model.add(Dense(1, activation='sigmoid'))
#     model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-4, 1e-3, 1e-2])),
#                                  loss='binary_crossentropy', metrics=['accuracy'])
#     return model

In [None]:
# tuner = kt.RandomSearch(
#     build_model,
#     objective='val_accuracy',
#     max_trials=20,
#     executions_per_trial=1,
#     directory='tuner_results',
#     project_name='gru_randomsearch'
# )

In [None]:
# estop = EarlyStopping(monitor='val_loss', mode='min', 
#                       min_delta=1e-5, patience=PATIENCE,
#                       restore_best_weights=True, verbose=1)
# tuner.search(X_train_pad, y_train,
#             validation_data=(X_val_pad, y_val),
#             epochs=EPOCHS, batch_size=BATCH_SIZE,
#             callbacks=[estop], verbose=1)

In [None]:
# best_model1 = tuner.get_best_model(num_models=1)[0]
# best_hps1 = tuner.get_best_hyperparameters(num_trials=1)[0]
# print(best_hps1.values)

In [None]:
# best_loss1, best_accuracy1 = best_model1.evaluate(X_test_pad, y_test, verbose=0)
# print("Tuned GRU (trainable=False) test accuracy:",best_accuracy1)
# print("Tuned GRU (trainable=False) test loss:", best_loss1)

## Trainable = True

In [18]:
model = Sequential()
model.add(Input(shape=(MAX_LEN,)))
model.add(Embedding(input_dim=VOCAB_SIZE,
                    output_dim=EMBEDDING_DIM,
                    weights=[embedding_matrix],
                    trainable=True))
# defaults: activation='tanh', recurrent_activation='sigmoid'
model.add(GRU(128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [19]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', mode='min', 
                      min_delta=1e-5, patience=PATIENCE,
                      restore_best_weights=True, verbose=1)
model.fit(X_train_pad, y_train,
          validation_data=(X_val_pad, y_val),
          epochs=EPOCHS, batch_size=BATCH_SIZE,
          callbacks=[estop], verbose=1)

Epoch 1/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 525ms/step - accuracy: 0.5309 - loss: 0.6731 - val_accuracy: 0.5471 - val_loss: 0.6369
Epoch 2/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m214s[0m 522ms/step - accuracy: 0.5456 - loss: 0.6388 - val_accuracy: 0.5468 - val_loss: 0.6398
Epoch 3/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 478ms/step - accuracy: 0.6990 - loss: 0.4822 - val_accuracy: 0.9352 - val_loss: 0.1668
Epoch 4/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 412ms/step - accuracy: 0.9662 - loss: 0.1040 - val_accuracy: 0.9629 - val_loss: 0.1089
Epoch 5/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 371ms/step - accuracy: 0.9929 - loss: 0.0278 - val_accuracy: 0.9605 - val_loss: 0.1161
Epoch 6/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 365ms/step - accuracy: 0.9976 - loss: 0.0089 - val_accuracy: 0.9583 - val_loss: 0.171

<keras.src.callbacks.history.History at 0x1bb23de9790>

In [20]:
gru_loss, gru_accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print("GRU Test accuracy:", gru_accuracy)
print("GRU Test loss:", gru_loss)

GRU Test accuracy: 0.9596107006072998
GRU Test loss: 0.11320266127586365


In [21]:
model.save('artifacts/simple_gru_trainable.keras')