In [1]:
import pandas as pd
import pickle
import keras
from keras.models import Sequential
from keras.layers import Input, Embedding, GRU, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import keras_tuner as kt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [2]:
EPOCHS=200
BATCH_SIZE=32
PATIENCE=5

# Load Embeddings & Inputs

In [3]:
with open('artifacts/pretrained_embeddings_inputs.pkl', 'rb') as f:
    loaded_input_items = pickle.load(f)

In [4]:
embedding_matrix = loaded_input_items['embedding_matrix']
X_train_pad = loaded_input_items['X_train_pad']
X_val_pad = loaded_input_items['X_val_pad']
X_test_pad = loaded_input_items['X_test_pad']
y_train = loaded_input_items['y_train']
y_val = loaded_input_items['y_val']
y_test = loaded_input_items['y_test']

In [5]:
VOCAB_SIZE = embedding_matrix.shape[0]
EMBEDDING_DIM = embedding_matrix.shape[1]
MAX_LEN = len(X_train_pad[0])
print(f"Embedding dimension: {EMBEDDING_DIM}\nVocab size: {VOCAB_SIZE}\nMaximum input length: {MAX_LEN}")

Embedding dimension: 100
Vocab size: 35756
Maximum input length: 588


# GRU

* While LSTM has 3 gates and cell state update, GRU only has 2 gates (`reset` and `update`).
* So, there are fewer multiplications and fewer parameters, allowing faster training with lesser memory usage.
* Despite being simpler, GRUs often perform just as well as LSTMs for text classification.

## Trainable = True

In [6]:
model = Sequential()
model.add(Input(shape=(MAX_LEN,)))
model.add(Embedding(input_dim=VOCAB_SIZE,
                    output_dim=EMBEDDING_DIM,
                    weights=[embedding_matrix],
                    trainable=True))
# defaults: activation='tanh', recurrent_activation='sigmoid'
model.add(GRU(128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [7]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', mode='min', 
                      min_delta=1e-5, patience=PATIENCE,
                      restore_best_weights=True, verbose=1)
model.fit(X_train_pad, y_train,
          validation_data=(X_val_pad, y_val),
          epochs=EPOCHS, batch_size=BATCH_SIZE,
          callbacks=[estop], verbose=1)

Epoch 1/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 201ms/step - accuracy: 0.5429 - loss: 0.6761 - val_accuracy: 0.5155 - val_loss: 0.7022
Epoch 2/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 203ms/step - accuracy: 0.6424 - loss: 0.6092 - val_accuracy: 0.8546 - val_loss: 0.3445
Epoch 3/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 205ms/step - accuracy: 0.9185 - loss: 0.2172 - val_accuracy: 0.9416 - val_loss: 0.1574
Epoch 4/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 204ms/step - accuracy: 0.9693 - loss: 0.0936 - val_accuracy: 0.9516 - val_loss: 0.1522
Epoch 5/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 205ms/step - accuracy: 0.9775 - loss: 0.0638 - val_accuracy: 0.9474 - val_loss: 0.1845
Epoch 6/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 204ms/step - accuracy: 0.9906 - loss: 0.0321 - val_accuracy: 0.9541 - val_loss: 0.1782
Epoc

<keras.src.callbacks.history.History at 0x2609721f2f0>

In [8]:
gru_loss, gru_accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print("GRU Test accuracy:", gru_accuracy)
print("GRU Test loss:", gru_loss)

GRU Test accuracy: 0.9479318857192993
GRU Test loss: 0.14533820748329163


In [9]:
model.save('artifacts/pretrained_gru.keras')

## Trainable = False

In [10]:
model = Sequential()
model.add(Input(shape=(MAX_LEN,)))
model.add(Embedding(input_dim=VOCAB_SIZE,
                    output_dim=EMBEDDING_DIM,
                    weights=[embedding_matrix],
                    trainable=False))
# defaults: activation='tanh', recurrent_activation='sigmoid'
model.add(GRU(128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [11]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', mode='min', 
                      min_delta=1e-5, patience=PATIENCE,
                      restore_best_weights=True, verbose=1)
model.fit(X_train_pad, y_train,
          validation_data=(X_val_pad, y_val),
          epochs=EPOCHS, batch_size=BATCH_SIZE,
          callbacks=[estop], verbose=1)

Epoch 1/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 203ms/step - accuracy: 0.5250 - loss: 0.6778 - val_accuracy: 0.5380 - val_loss: 0.6599
Epoch 2/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 202ms/step - accuracy: 0.5650 - loss: 0.6447 - val_accuracy: 0.7527 - val_loss: 0.5312
Epoch 3/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 192ms/step - accuracy: 0.7651 - loss: 0.5094 - val_accuracy: 0.8212 - val_loss: 0.3969
Epoch 4/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 195ms/step - accuracy: 0.8747 - loss: 0.3065 - val_accuracy: 0.9124 - val_loss: 0.2216
Epoch 5/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 191ms/step - accuracy: 0.9297 - loss: 0.1810 - val_accuracy: 0.9267 - val_loss: 0.1708
Epoch 6/200
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 190ms/step - accuracy: 0.9484 - loss: 0.1398 - val_accuracy: 0.9380 - val_loss: 0.1454
Epoc

<keras.src.callbacks.history.History at 0x26099644350>

In [12]:
gru_loss, gru_accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print("GRU Test accuracy:", gru_accuracy)
print("GRU Test loss:", gru_loss)

GRU Test accuracy: 0.9430657029151917
GRU Test loss: 0.14449326694011688


In [13]:
model.save('artifacts/pretrained_gru_frozen.keras')