### Import Libraries

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

### Load data

In [None]:
# Read the dataset
df = pd.read_csv('emotions.csv')
df.head()

In [None]:
df.info

In [None]:
# unique labels
labels_name= df['label'].unique()
labels_name

In [None]:
# Number of classes
num_of_classes = len(labels_name)
print('Total count of the unique labels are ',num_of_classes)

### Preprocessing

In [None]:
# Tokenization
max_words = 3000
tokenizer = Tokenizer(num_words=max_words,oov_token="<OOV>")

# fit the tokenizer on text data to build vocabulary
tokenizer.fit_on_texts(df['text'])

# Convert text to sequnces of integers
X = tokenizer.texts_to_sequences(df['text'])
print(X[0])

In [None]:
# Padding the sequences to uniform lengths
maxlen=100
X = pad_sequences(X, padding='post', maxlen=maxlen)
print('Pad sequnce')
print(X)

vocab_size = len(tokenizer.word_index) + 1 # adding 1 for token padding (i.e. index zero)
print('Vocabulary size',vocab_size)


In [None]:
list(tokenizer.word_index.items())[:20]

In [None]:
# Label encoding
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['label'])
print(y)

#One-Hot Encode the labels
y = to_categorical(y)
print(y)

In [None]:
#train,test-split
x_train,x_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

print(f'Shape of the training data: {x_train.shape},{y_train.shape}')
print(f'Shape of the test data: {x_test.shape},{y_test.shape}')
print(f'Shape of the test data: {x_val.shape},{y_val.shape}')


### Create model

In [None]:
embedding_dim= 100
model = Sequential([

Embedding(input_dim= max_words, output_dim=embedding_dim ,input_length= maxlen),
LSTM(64, activation='relu'),
Dropout(0.5),
Dense(num_of_classes, activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


### Train the model

In [None]:
history = model.fit(x_train, y_train, epochs=10,batch_size=32, validation_data=(x_val, y_val))


### Visualize the model performance

In [None]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()


### Evaluate on the test set

In [None]:
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

In [None]:
# Make predictions on the test set
predictions = model.predict(x_test)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(y_test, axis=1)

# Print classification report
print('\nClassification Report:')
print(classification_report(true_labels, predicted_labels))

In [None]:
# Plot confusion matrix
cf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 8))
plt.imshow(cf_matrix, cmap='Blues', interpolation='nearest')
plt.title('Confusion Matrix')
plt.xticks(np.arange(len(num_of_classes)), num_of_classes)
plt.yticks(np.arange(len(num_of_classes)), num_of_classes)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()