In [1]:
# 1. Import Libraries
import pandas as pd
import re
import nltk
import numpy as np

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout



ImportError: Traceback (most recent call last):
  File "c:\Users\deepa\anaconda3\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

In [3]:
# 2. NLTK Setup
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\deepa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\deepa\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
# 3. Load and Preprocess Data
df = pd.read_csv("emotions.csv")  
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

df['cleaned_text'] = df['text'].apply(clean_text)

In [5]:
# 4. Label Encoding
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])

In [None]:
# 5. Tokenization and Padding
max_words = 10000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['cleaned_text'])
sequences = tokenizer.texts_to_sequences(df['cleaned_text'])
X = pad_sequences(sequences, maxlen=max_len)

y = to_categorical(df['label_encoded'])


In [7]:
# 6. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# 7. Build LSTM Model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y.shape[1], activation='softmax'))  # Multiclass

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [9]:
# 8. Train Model
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.1)


Epoch 1/5
[1m4682/4682[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m664s[0m 141ms/step - accuracy: 0.8096 - loss: 0.4815 - val_accuracy: 0.9403 - val_loss: 0.0919
Epoch 2/5
[1m4682/4682[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m530s[0m 113ms/step - accuracy: 0.9389 - loss: 0.0976 - val_accuracy: 0.9415 - val_loss: 0.0885
Epoch 3/5
[1m4682/4682[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m539s[0m 115ms/step - accuracy: 0.9417 - loss: 0.0900 - val_accuracy: 0.9415 - val_loss: 0.0883
Epoch 4/5
[1m4682/4682[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m560s[0m 120ms/step - accuracy: 0.9425 - loss: 0.0871 - val_accuracy: 0.9424 - val_loss: 0.0898
Epoch 5/5
[1m4682/4682[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m568s[0m 121ms/step - accuracy: 0.9421 - loss: 0.0849 - val_accuracy: 0.9423 - val_loss: 0.0916


<keras.src.callbacks.history.History at 0x297da3bf790>

In [10]:
# 9. Evaluate Model
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

acc = accuracy_score(y_true, y_pred)
print("Accuracy:", acc)
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=le.classes_.astype(str)))


[1m2601/2601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 28ms/step
Accuracy: 0.9395253829978972
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     24121
           1       0.99      0.92      0.95     28220
           2       0.77      1.00      0.87      6824
           3       0.90      0.99      0.95     11448
           4       0.94      0.86      0.90      9574
           5       0.74      0.99      0.85      3038

    accuracy                           0.94     83225
   macro avg       0.89      0.95      0.91     83225
weighted avg       0.95      0.94      0.94     83225



In [None]:
def predict_emotion(input_text):
    # Clean the input
    cleaned = clean_text(input_text)
    
    # Tokenize and pad
    seq = tokenizer.texts_to_sequences([cleaned])
    padded = pad_sequences(seq, maxlen=max_len)
    
    # Predict
    pred = model.predict(padded)
    label_index = np.argmax(pred)
    
    # Get label
    emotion = le.inverse_transform([label_index])[0]
    return emotion

# Example use
print(predict_emotion("I feel amazing and full of energy!"))  # → might return 'happy' or similar


In [2]:
model.save("emotion_lstm_model.h5")

NameError: name 'model' is not defined

In [13]:
import pickle
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

In [14]:
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)
