In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
dataset = load_dataset("emotion")

# Extracting features and labels
texts = dataset['train']['text']
labels = dataset['train']['label']

# Tokenize the text
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure uniform length
max_sequence_length = 100  # Adjust according to your data and requirements
data = pad_sequences(sequences, maxlen=max_sequence_length)

# Convert labels to one-hot encoding
label_dict = {label: i for i, label in enumerate(set(labels))}
num_classes = len(label_dict)
labels = [label_dict[label] for label in labels]
labels = tf.keras.utils.to_categorical(labels, num_classes)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

# Build LSTM model
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(input_dim=10000, output_dim=16),  # Removed input_length
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
    tf.keras.layers.Dense(6, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Epoch 1/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 87ms/step - accuracy: 0.3406 - loss: 1.6025 - val_accuracy: 0.4822 - val_loss: 1.4207
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 89ms/step - accuracy: 0.5978 - loss: 1.1523 - val_accuracy: 0.6275 - val_loss: 0.9958
Epoch 3/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 89ms/step - accuracy: 0.6957 - loss: 0.8243 - val_accuracy: 0.6906 - val_loss: 0.8614
Epoch 4/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 85ms/step - accuracy: 0.8224 - loss: 0.5412 - val_accuracy: 0.7991 - val_loss: 0.6154
Epoch 5/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 83ms/step - accuracy: 0.8775 - loss: 0.4080 - val_accuracy: 0.8028 - val_loss: 0.5986
Epoch 6/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 84ms/step - accuracy: 0.9165 - loss: 0.2677 - val_accuracy: 0.8169 - val_loss: 0.6004
Epoch 7/10
[1m2

<keras.src.callbacks.history.History at 0x2c23f0074d0>

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define a function for making predictions
def predict_emotion(input_text):
    # Preprocess the input text
    sequence = tokenizer.texts_to_sequences([input_text])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length)
    
    # Make prediction
    prediction = model.predict(padded_sequence)
    
    # Decode the prediction
    emotion_labels = ['joy', 'anger', 'love', 'sadness', 'fear', 'surprise']
    predicted_label = emotion_labels[np.argmax(prediction)]
    prediction_value = np.max(prediction)
    
    return predicted_label, prediction_value

# Now, you can use this function to get predictions for any input text
input_text = input("Enter a text: ")
predicted_emotion, confidence = predict_emotion(input_text)
print("Predicted Emotion:", predicted_emotion)
print("Confidence:", confidence)


Enter a text:  i am upset


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 788ms/step
Predicted Emotion: sadness
Confidence: 0.85830307
