# LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('english_only.csv') 

TEST_SIZE = 0.15
VALIDATE_SIZE = 0.1765
RANDOM_STATE_INT = 14988828

max_len = 128
tokenizer = Tokenizer(num_words=None)
tokenizer.fit_on_texts(df['excerpt_value_cleaned'])
sequences = tokenizer.texts_to_sequences(df['excerpt_value_cleaned'])
padded_sequences = pad_sequences(sequences, maxlen=max_len)

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['plutchik_emotion'])

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=TEST_SIZE, random_state=RANDOM_STATE_INT)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VALIDATE_SIZE, random_state=RANDOM_STATE_INT)

embedding_dim = 100
dropout_rate = 0.5
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, embedding_dim, input_length=max_len))
model.add(LSTM(128))
model.add(Dropout(dropout_rate))
model.add(Dense(64, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(8, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

batch_size = 32
epochs = 2  
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs, callbacks=[early_stopping])

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test, y_pred_classes)
f1_weighted = f1_score(y_test, y_pred_classes, average='weighted')
recall_weighted = recall_score(y_test, y_pred_classes, average='weighted')
precision_weighted = precision_score(y_test, y_pred_classes, average='weighted')
f1_macro = f1_score(y_test, y_pred_classes, average='macro')
recall_macro = recall_score(y_test, y_pred_classes, average='macro')
precision_macro = precision_score(y_test, y_pred_classes, average='macro')

print("Accuracy:", accuracy)
print("F1 Score (Weighted):", f1_weighted)
print("Recall (Weighted):", recall_weighted)
print("Precision (Weighted):", precision_weighted)
print("F1 Score (Macro):", f1_macro)
print("Recall (Macro):", recall_macro)
print("Precision (Macro):", precision_macro)

cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

emotion_categories = label_encoder.classes_

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=emotion_categories, yticklabels=emotion_categories)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


# Bi-LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('english_only.csv') 

TEST_SIZE = 0.15
VALIDATE_SIZE = 0.1765
RANDOM_STATE_INT = 14988828

max_len = 128
tokenizer = Tokenizer(num_words=None)  # Using all unique words
tokenizer.fit_on_texts(df['excerpt_value_cleaned'])
sequences = tokenizer.texts_to_sequences(df['excerpt_value_cleaned'])
padded_sequences = pad_sequences(sequences, maxlen=max_len)

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['plutchik_emotion'])

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=TEST_SIZE, random_state=RANDOM_STATE_INT)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VALIDATE_SIZE, random_state=RANDOM_STATE_INT)

embedding_dim = 100
dropout_rate = 0.5
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, embedding_dim, input_length=max_len))
model.add(Bidirectional(LSTM(128)))
model.add(Dropout(dropout_rate))
model.add(Dense(64, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(8, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

batch_size = 32
epochs = 2
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs, callbacks=[early_stopping])

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test, y_pred_classes)
f1_weighted = f1_score(y_test, y_pred_classes, average='weighted')
recall_weighted = recall_score(y_test, y_pred_classes, average='weighted')
precision_weighted = precision_score(y_test, y_pred_classes, average='weighted')
f1_macro = f1_score(y_test, y_pred_classes, average='macro')
recall_macro = recall_score(y_test, y_pred_classes, average='macro')
precision_macro = precision_score(y_test, y_pred_classes, average='macro')

print("Accuracy:", accuracy)
print("F1 Score (Weighted):", f1_weighted)
print("Recall (Weighted):", recall_weighted)
print("Precision (Weighted):", precision_weighted)
print("F1 Score (Macro):", f1_macro)
print("Recall (Macro):", recall_macro)
print("Precision (Macro):", precision_macro)

cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

emotion_categories = label_encoder.classes_

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=emotion_categories, yticklabels=emotion_categories)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


# Bi-LSTM with Attention

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dense, Dropout, Attention, GlobalAveragePooling1D
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('english_only.csv') 

TEST_SIZE = 0.15
VALIDATE_SIZE = 0.1765
RANDOM_STATE_INT = 14988828
batch_size = 32
epochs = 3

max_len = 128
tokenizer = Tokenizer(num_words=None)
tokenizer.fit_on_texts(df['excerpt_value_cleaned'])
sequences = tokenizer.texts_to_sequences(df['excerpt_value_cleaned'])
padded_sequences = pad_sequences(sequences, maxlen=max_len)

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['plutchik_emotion'])

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=TEST_SIZE, random_state=RANDOM_STATE_INT)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VALIDATE_SIZE, random_state=RANDOM_STATE_INT)

embedding_dim = 100
dropout_rate = 0.5

input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(len(tokenizer.word_index) + 1, embedding_dim, input_length=max_len)(input_layer)
bi_lstm = Bidirectional(LSTM(128, return_sequences=True))(embedding_layer)

attention = Attention()([bi_lstm, bi_lstm])
context_vector = GlobalAveragePooling1D()(attention)

dropout_layer = Dropout(dropout_rate)(context_vector)
dense_layer_1 = Dense(64, activation='relu')(dropout_layer)
dropout_layer_2 = Dropout(dropout_rate)(dense_layer_1)
output_layer = Dense(8, activation='softmax')(dropout_layer_2)  # 8 emotion classes from Plutchik's wheel of emotions

model = Model(inputs=input_layer, outputs=output_layer)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs, callbacks=[early_stopping])

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test, y_pred_classes)
f1_w = f1_score(y_test, y_pred_classes, average='weighted')
recall_w = recall_score(y_test, y_pred_classes, average='weighted')
precision_w = precision_score(y_test, y_pred_classes, average='weighted')
f1_m = f1_score(y_test, y_pred_classes, average='macro')
recall_m = recall_score(y_test, y_pred_classes, average='macro')
precision_m = precision_score(y_test, y_pred_classes, average='macro')

print("Accuracy:", accuracy)
print("F1 Score (Weighted):", f1_w)
print("F1 Score (Macro):", f1_m)
print("Recall (Weighted):", recall_w)
print("Recall (Macro):", recall_m)
print("Precision (Weighted):", precision_w)
print("Precision (Macro):", precision_m)

cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

emotion_categories = label_encoder.classes_

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=emotion_categories, yticklabels=emotion_categories)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


# Bi-LSTM with Attention and CNN

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, Bidirectional, LSTM, Attention, GlobalAveragePooling1D, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('english_only.csv')

TEST_SIZE = 0.15
VALIDATE_SIZE = 0.1765
RANDOM_STATE_INT = 14988828
batch_size = 32
epochs = 3

max_len = 128
tokenizer = Tokenizer(num_words=None)
tokenizer.fit_on_texts(df['excerpt_value_cleaned'])
sequences = tokenizer.texts_to_sequences(df['excerpt_value_cleaned'])
padded_sequences = pad_sequences(sequences, maxlen=max_len)

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['plutchik_emotion'])

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=TEST_SIZE, random_state=RANDOM_STATE_INT)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VALIDATE_SIZE, random_state=RANDOM_STATE_INT)

embedding_dim = 100
dropout_rate = 0.5

input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(len(tokenizer.word_index) + 1, embedding_dim, input_length=max_len)(input_layer)

conv_layer = Conv1D(filters=128, kernel_size=5, activation='relu')(embedding_layer)
pooling_layer = MaxPooling1D(pool_size=2)(conv_layer)

bi_lstm = Bidirectional(LSTM(128, return_sequences=True))(pooling_layer)

attention = Attention()([bi_lstm, bi_lstm])
context_vector = GlobalAveragePooling1D()(attention)

dropout_layer = Dropout(dropout_rate)(context_vector)
dense_layer_1 = Dense(64, activation='relu')(dropout_layer)
dropout_layer_2 = Dropout(dropout_rate)(dense_layer_1)
output_layer = Dense(8, activation='softmax')(dropout_layer_2)  # 8 emotion classes from Plutchik's wheel of emotions

model = Model(inputs=input_layer, outputs=output_layer)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs, callbacks=[early_stopping])

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test, y_pred_classes)
f1_w = f1_score(y_test, y_pred_classes, average='weighted')
recall_w = recall_score(y_test, y_pred_classes, average='weighted')
precision_w = precision_score(y_test, y_pred_classes, average='weighted')
f1_m = f1_score(y_test, y_pred_classes, average='macro')
recall_m = recall_score(y_test, y_pred_classes, average='macro')
precision_m = precision_score(y_test, y_pred_classes, average='macro')

print("Accuracy:", accuracy)
print("F1 Score (Weighted):", f1_w)
print("F1 Score (Macro):", f1_m)
print("Recall (Weighted):", recall_w)
print("Recall (Macro):", recall_m)
print("Precision (Weighted):", precision_w)
print("Precision (Macro):", precision_m)

cm = confusion_matrix(y_test, y_pred_classes)
print(cm)

emotion_categories = label_encoder.classes_

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=emotion_categories, yticklabels=emotion_categories)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
