# Deep Learning Model Development 


In [3]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import Input, Embedding, LSTM, Dropout, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

### LSTM

In [5]:
# Load your dataset
df = pd.read_csv("modified_dataset.csv")

# Prepare the labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['LABEL'])

# Prepare the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['REVIEW_TEXT'])
sequences = tokenizer.texts_to_sequences(df['REVIEW_TEXT'])
X = pad_sequences(sequences, maxlen=200)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Define the LSTM model
lstm_model = Sequential([
    Input(shape=(200,)),
    Embedding(input_dim=10000, output_dim=64),
    LSTM(50, return_sequences=True),  
    Dropout(0.3),
    LSTM(50),  
    Dropout(0.3),
    Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(0.01))
])

# Compile the model with additional metrics
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', Precision(), Recall()])

# Setup early stopping
early_stopping_lstm = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)  # Aggressive early stopping

# Train the model with early stopping
lstm_model_history = lstm_model.fit(
    X_train, y_train, epochs=20, validation_split=0.2, batch_size=64,  
    callbacks=[early_stopping_lstm]
)

# Evaluate the model on the test set to get the performance metrics
test_loss, test_accuracy, test_precision, test_recall = lstm_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}, Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}")

# Calculate the F1-score
test_f1_score = 2 * (test_precision * test_recall) / (test_precision + test_recall)
print(f"Test F1 Score: {test_f1_score:.4f}")


Epoch 1/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 250ms/step - accuracy: 0.5353 - loss: 0.7089 - precision_1: 0.5314 - recall_1: 0.8242 - val_accuracy: 0.6161 - val_loss: 0.6762 - val_precision_1: 0.6279 - val_recall_1: 0.5875
Epoch 2/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 240ms/step - accuracy: 0.6869 - loss: 0.6218 - precision_1: 0.6991 - recall_1: 0.6613 - val_accuracy: 0.6276 - val_loss: 0.6627 - val_precision_1: 0.6250 - val_recall_1: 0.6601
Epoch 3/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 240ms/step - accuracy: 0.7801 - loss: 0.4948 - precision_1: 0.8032 - recall_1: 0.7421 - val_accuracy: 0.6367 - val_loss: 0.7154 - val_precision_1: 0.6166 - val_recall_1: 0.7459
Epoch 4/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 242ms/step - accuracy: 0.8483 - loss: 0.3830 - precision_1: 0.8478 - recall_1: 0.8478 - val_accuracy: 0.6247 - val_loss: 0.8094 - val_precision_1: 0.6337 - val_recal

### CNN-LSTM

In [6]:
# CNN-LSTM Model Setup
cnn_lstm_model = Sequential([
    Embedding(input_dim=10000, output_dim=50),
    Conv1D(filters=64, kernel_size=5, activation='relu'),
    MaxPooling1D(pool_size=4),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

# Compiling the CNN-LSTM model
cnn_lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', Precision(), Recall()])

# Early stopping to prevent overfitting
early_stopping_cnn_lstm = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training the CNN-LSTM model
cnn_lstm_model_history = cnn_lstm_model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test), callbacks=[early_stopping_cnn_lstm])

# Evaluating the model on the test set to get the test performance metrics
test_loss, test_accuracy, test_precision, test_recall = cnn_lstm_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}, Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}")

# Calculate the F1-score
test_f1_score = 2 * (test_precision * test_recall) / (test_precision + test_recall)
print(f"Test F1 Score: {test_f1_score:.4f}")

Epoch 1/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 65ms/step - accuracy: 0.5443 - loss: 0.6861 - precision_2: 0.5384 - recall_2: 0.7706 - val_accuracy: 0.6429 - val_loss: 0.6330 - val_precision_2: 0.6951 - val_recall_2: 0.4856
Epoch 2/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 61ms/step - accuracy: 0.7317 - loss: 0.5458 - precision_2: 0.7442 - recall_2: 0.6953 - val_accuracy: 0.6520 - val_loss: 0.6240 - val_precision_2: 0.6137 - val_recall_2: 0.7747
Epoch 3/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 62ms/step - accuracy: 0.8265 - loss: 0.3999 - precision_2: 0.8276 - recall_2: 0.8232 - val_accuracy: 0.6539 - val_loss: 0.7069 - val_precision_2: 0.6135 - val_recall_2: 0.7870
Epoch 4/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 61ms/step - accuracy: 0.9101 - loss: 0.2464 - precision_2: 0.9088 - recall_2: 0.9145 - val_accuracy: 0.6467 - val_loss: 0.8611 - val_precision_2: 0.6102 - val_r

### LSTM-RNN

In [7]:
# LSTM-RNN Model Setup
lstm_rnn_model = Sequential([
    Input(shape=(200,)),  
    Embedding(input_dim=10000, output_dim=50),
    Dropout(0.3),
    LSTM(64, return_sequences=True),
    Dropout(0.3),
    SimpleRNN(32),
    Dense(1, activation='sigmoid')
])

# Compiling the LSTM-RNN model
lstm_rnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', Precision(), Recall()])

# Early stopping to prevent overfitting
early_stopping_lstm_rnn = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Training the LSTM-RNN model
lstm_rnn_model_history = lstm_rnn_model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test), callbacks=[early_stopping_lstm_rnn])

# Evaluate the model on the test set to get the performance metrics
test_loss, test_accuracy, test_precision, test_recall = lstm_rnn_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}, Test Precision: {test_precision:.4f}, Test Recall: {test_recall:.4f}")

# Calculate the F1-score
test_f1_score = 2 * (test_precision * test_recall) / (test_precision + test_recall)
print(f"Test F1 Score: {test_f1_score:.4f}")


Epoch 1/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 164ms/step - accuracy: 0.5091 - loss: 0.6934 - precision_3: 0.5087 - recall_3: 0.5236 - val_accuracy: 0.5646 - val_loss: 0.6788 - val_precision_3: 0.5299 - val_recall_3: 0.9403
Epoch 2/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 160ms/step - accuracy: 0.6517 - loss: 0.6308 - precision_3: 0.6467 - recall_3: 0.7039 - val_accuracy: 0.6250 - val_loss: 0.6456 - val_precision_3: 0.6276 - val_recall_3: 0.5792
Epoch 3/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 160ms/step - accuracy: 0.7697 - loss: 0.5097 - precision_3: 0.7683 - recall_3: 0.7738 - val_accuracy: 0.6293 - val_loss: 0.6678 - val_precision_3: 0.5930 - val_recall_3: 0.7510
Epoch 4/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 160ms/step - accuracy: 0.8231 - loss: 0.4016 - precision_3: 0.8122 - recall_3: 0.8266 - val_accuracy: 0.6286 - val_loss: 0.7182 - val_precision_3: 0.6018 - v