## Transformers-LSTM-GRU

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Concatenate, Dropout, Conv1D, MaxPooling1D, LSTM, GRU
from tensorflow.keras.models import Model
import optuna
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

import nltk
nltk.download('stopwords')
nltk.download('wordnet')

### Preprocessing module

In [None]:
# Load the data
train_url = 'https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv'
test_url = 'https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv'

train_df = pd.read_csv(train_url, header=None)
test_df = pd.read_csv(test_url, header=None)

train_df.columns = ['Class Index', 'Title', 'Description']
test_df.columns = ['Class Index', 'Title', 'Description']

In [None]:
# Define stop words and lemmatizer
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower() 
    text = re.sub(r'\b(u\.s\.|us)\b', 'usa', text, flags=re.IGNORECASE)
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'&\w+;', '', text)
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()
    words = [word for word in words if word not in stop_words] 
    words = [lemmatizer.lemmatize(word) for word in words] 
    return ' '.join(words)
    
# Combine 'Title' and 'Description' into 'clean_text' and preprocess
train_df['clean_text'] = (train_df['Title'] + ' ' + train_df['Description']).apply(preprocess_text)
test_df['clean_text'] = (test_df['Title'] + ' ' + test_df['Description']).apply(preprocess_text)

# Tokenization
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_df['clean_text'])

# Convert text to sequences
X_train_title_seq = tokenizer.texts_to_sequences(train_df['Title'])
X_test_title_seq = tokenizer.texts_to_sequences(test_df['Title'])

X_train_description_seq = tokenizer.texts_to_sequences(train_df['Description'])
X_test_description_seq = tokenizer.texts_to_sequences(test_df['Description'])

# Determine max sequence length and pad sequences
max_length_titles = max([len(x) for x in X_train_title_seq])
max_length_descriptions = max([len(x) for x in X_train_description_seq])

X_train_title_pad = pad_sequences(X_train_title_seq, maxlen=max_length_titles)
X_test_title_pad = pad_sequences(X_test_title_seq, maxlen=max_length_titles)

X_train_description_pad = pad_sequences(X_train_description_seq, maxlen=max_length_descriptions)
X_test_description_pad = pad_sequences(X_test_description_seq, maxlen=max_length_descriptions)

y_train = train_df['Class Index'].values - 1
y_test = test_df['Class Index'].values - 1

# Download and load GloVe embeddings
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove.6B.zip

embedding_index = {}
with open('glove.6B.100d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = coefs

embedding_matrix = np.zeros((5000, 100))
for word, i in tokenizer.word_index.items():
    if i < 5000:
        embedding_vector = embedding_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

### Basic training module (Base Model)

In [None]:
# Define inputs and embedding layers
title_input = Input(shape=(max_length_titles,), name='title_input')
title_embedding = Embedding(input_dim=5000, output_dim=100, weights=[embedding_matrix], trainable=False)(title_input)
title_gru = GRU(128, return_sequences=True)(title_embedding)
title_lstm = LSTM(128, return_sequences=False)(title_gru)

description_input = Input(shape=(max_length_descriptions,), name='description_input')
description_embedding = Embedding(input_dim=5000, output_dim=100, weights=[embedding_matrix], trainable=False)(description_input)
description_gru = GRU(128, return_sequences=True)(description_embedding)
description_lstm = LSTM(128, return_sequences=False)(description_gru)

merged = Concatenate()([title_lstm, description_lstm])

# Fully connected layers after merging
dense_1 = Dense(128, activation='relu')(merged)
dense_2 = Dense(64, activation='relu')(dense_1)
output = Dense(4, activation='softmax')(dense_2)

# Build and compile model
model = Model(inputs=[title_input, description_input], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Training with early stopping
model.fit([X_train_title_pad, X_train_description_pad], y_train, 
          validation_split=0.1, epochs=5, batch_size=16, verbose=1, 
          callbacks=[early_stopping])

### Testing module 1 (Base Model)

In [None]:
# Predictions and metrics for base model
y_pred_base = model.predict([X_test_title_pad, X_test_description_pad]).argmax(axis=1)
print("Base Model Accuracy:", np.mean(y_pred_base == y_test))
print(classification_report(y_test, y_pred_base))
conf_matrix_base = confusion_matrix(y_test, y_pred_base)

# Display confusion matrix for base model
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_base, annot=True, fmt='d', cmap='Blues', xticklabels=[0, 1, 2, 3], yticklabels=[0, 1, 2, 3])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix for Base Model')
plt.show()

### Hyperparameter selection module

In [None]:
def model_builder(trial):
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    gru_units = trial.suggest_int('gru_units', 64, 256)
    lstm_units = trial.suggest_int('lstm_units', 64, 256)

    title_input = Input(shape=(max_length_titles,), name='title_input')
    title_embedding = Embedding(input_dim=5000, output_dim=100, 
                                weights=[embedding_matrix], trainable=False)(title_input)
    title_gru = GRU(gru_units, return_sequences=True)(title_embedding)
    title_lstm = LSTM(lstm_units, return_sequences=False)(title_gru)

    description_input = Input(shape=(max_length_descriptions,), name='description_input')
    description_embedding = Embedding(input_dim=5000, output_dim=100, 
                                      weights=[embedding_matrix], trainable=False)(description_input)
    description_gru = GRU(gru_units, return_sequences=True)(description_embedding)
    description_lstm = LSTM(lstm_units, return_sequences=False)(description_gru)

    merged = Concatenate()([title_lstm, description_lstm])
    dense_1 = Dense(128, activation='relu')(merged)
    dropout_1 = Dropout(dropout_rate)(dense_1)
    output = Dense(4, activation='softmax')(dropout_1)

    model = Model(inputs=[title_input, description_input], outputs=output)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

# Optuna objective function
def objective(trial):
    model = model_builder(trial)
    model.fit([X_train_title_pad, X_train_description_pad], y_train, 
              validation_split=0.1, epochs=3, batch_size=16, verbose=0)
    loss, accuracy = model.evaluate([X_test_title_pad, X_test_description_pad], y_test, verbose=0)
    return accuracy

# Run Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)
print("Best trial:", study.best_trial)

### Testing module 2 (Optimized Model)

In [None]:
# Build optimized model with best parameters
best_trial = study.best_trial
model_optimized = model_builder(best_trial)

# Training and testing optimized model
model_optimized.fit([X_train_title_pad, X_train_description_pad], y_train, 
                    validation_split=0.1, epochs=5, batch_size=16, verbose=1)
y_pred_optimized = model_optimized.predict([X_test_title_pad, X_test_description_pad]).argmax(axis=1)

# Optimized model metrics
print("Optimized Model Accuracy:", np.mean(y_pred_optimized == y_test))
print(classification_report(y_test, y_pred_optimized))
conf_matrix_optimized = confusion_matrix(y_test, y_pred_optimized)

# Display confusion matrix for optimized model
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_optimized, annot=True, fmt='d', cmap='Blues', xticklabels=[0, 1, 2, 3], yticklabels=[0, 1, 2, 3])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix for Optimized Model')
plt.show()