In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import pickle
import re
import nltk


In [None]:
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = " ".join([word for word in text.split() if word not in stop_words])
    text = re.sub('[^a-zA-Z\s]', '', text)
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])
    return text


df = pd.read_csv('/content/sample_data/training.csv')
df['cleaned_text'] = df['text'].apply(clean_text)
df[['text', 'cleaned_text']].head()

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,text,cleaned_text
0,i didnt feel humiliated,didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,go feeling hopeless damned hopeful around some...
2,im grabbing a minute to post i feel greedy wrong,im grabbing minute post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,ever feeling nostalgic fireplace know still pr...
4,i am feeling grouchy,feeling grouchy


In [None]:
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(df['cleaned_text'])
sequences = tokenizer.texts_to_sequences(df['cleaned_text'])
max_length = 150
X = pad_sequences(sequences, maxlen=max_length, truncating='post')
y = pd.get_dummies(df['label']).values


In [None]:
# Building the model
from tensorflow.keras.callbacks import ReduceLROnPlateau
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=200, input_length=max_length),  # Embedding layer
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)),  # Bidirectional LSTM layer
    tf.keras.layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    tf.keras.layers.Dense(128, activation='relu'),  # Fully connected layer with ReLU activation
    tf.keras.layers.Dropout(0.5),  # Dropout layer for regularization
    tf.keras.layers.Dense(6, activation='softmax')  # Output layer with softmax for multi-class classification
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6)
model.summary()




In [None]:
history = model.fit(X, y, epochs=5, batch_size=64, validation_split=0.2, callbacks=[lr_scheduler])

Epoch 1/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 1s/step - accuracy: 0.3448 - loss: 1.5559 - val_accuracy: 0.7125 - val_loss: 0.8012 - learning_rate: 0.0010
Epoch 2/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 1s/step - accuracy: 0.7697 - loss: 0.6319 - val_accuracy: 0.8559 - val_loss: 0.4230 - learning_rate: 0.0010
Epoch 3/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 1s/step - accuracy: 0.9216 - loss: 0.2509 - val_accuracy: 0.8872 - val_loss: 0.3422 - learning_rate: 0.0010
Epoch 4/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 1s/step - accuracy: 0.9542 - loss: 0.1450 - val_accuracy: 0.8875 - val_loss: 0.4018 - learning_rate: 0.0010
Epoch 5/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 1s/step - accuracy: 0.9768 - loss: 0.0757 - val_accuracy: 0.9075 - val_loss: 0.3372 - learning_rate: 0.0010


In [None]:
# Save the trained model
model.save('emotion_detection_model.h5')

# Save the tokenizer
with open('tokenizer.pickle', 'wb') as f:
    pickle.dump(tokenizer, f)




In [None]:
from tensorflow.keras.models import load_model
import pickle

model = load_model('emotion_detection_model.h5') 
with open('tokenizer.pickle', 'rb') as f:
    tokenizer = pickle.load(f)



In [None]:
emotion_labels = ['sadness', 'joy', 'love', 'anger', 'fear']

def predict_emotion(text):
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = pad_sequences(seq, maxlen=max_len, truncating='post')
    prediction = model.predict(padded_seq)
    predicted_label = prediction.argmax(axis=-1)[0]
    predicted_emotion = emotion_labels[predicted_label]
    return predicted_emotion