In [1]:
!pip install datasets



In [171]:
from datasets import load_dataset

In [172]:
import pandas as pd
dataset=load_dataset("go_emotions")

In [173]:
dataset['train']

Dataset({
    features: ['text', 'labels', 'id'],
    num_rows: 43410
})

In [174]:
total_emotions=28

In [175]:
label_names=list(dataset['train'].features['labels'].feature.names)
label_names

['admiration',
 'amusement',
 'anger',
 'annoyance',
 'approval',
 'caring',
 'confusion',
 'curiosity',
 'desire',
 'disappointment',
 'disapproval',
 'disgust',
 'embarrassment',
 'excitement',
 'fear',
 'gratitude',
 'grief',
 'joy',
 'love',
 'nervousness',
 'optimism',
 'pride',
 'realization',
 'relief',
 'remorse',
 'sadness',
 'surprise',
 'neutral']

In [176]:
X_train=dataset['train']['text']
y_train=dataset['train']['labels']

X_val=dataset['validation']['text']
y_val=dataset['validation']['labels']

X_test=dataset['test']['text']
y_test=dataset['test']['labels']

In [177]:
num_labels = len(label_names)

def to_multihot(labels):
    v = np.zeros(num_labels, dtype="float32")
    v[labels] = 1.0
    return v

y_train = np.array([to_multihot(l) for l in dataset['train']['labels']])
y_val   = np.array([to_multihot(l) for l in dataset['validation']['labels']])
y_test  = np.array([to_multihot(l) for l in dataset['test']['labels']])

In [178]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [179]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(X_train)

In [180]:
X_train_seq=tokenizer.texts_to_sequences(X_train)
X_val_seq=tokenizer.texts_to_sequences(X_val)
X_test_seq=tokenizer.texts_to_sequences(X_test)

In [181]:
#tokenizer.word_index

In [182]:
vocab_size=len(tokenizer.word_index)

In [183]:
max_len=max(len(review) for review in X_train_seq)
max_len

142

In [184]:
X_train_seq=pad_sequences(X_train_seq,maxlen=142,padding='post')
X_test_seq=pad_sequences(X_test_seq,maxlen=142,padding='post')
X_val_seq=pad_sequences(X_val_seq,maxlen=142,padding='post')

In [185]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

In [186]:
import numpy as np

X_train_seq = np.array(X_train_seq, dtype="int32")
X_val_seq = np.array(X_val_seq, dtype="int32")
X_test_seq = np.array(X_test_seq, dtype="int32")
y_train=np.array(y_train, dtype="int32")
y_test=np.array(y_test, dtype="int32")
y_val=np.array(y_val, dtype="int32")

In [187]:
class Attention(Layer):
    def __init__(self):
        super().__init__()

    def build(self, input_shape):
        self.W = self.add_weight(
            shape=(input_shape[-1], 1),
            initializer="normal",
            trainable=True
        )

    def call(self, inputs):
        # inputs: (batch, timesteps, hidden)
        scores = tf.matmul(inputs, self.W)      # (batch, timesteps, 1)
        scores = tf.nn.softmax(scores, axis=1)  # attention weights
        context = tf.reduce_sum(inputs * scores, axis=1)
        return context

In [188]:
input_shape=Input(shape=(max_len,))

In [189]:
x=Embedding(input_dim=vocab_size,output_dim=16,input_length=max_len)(input_shape)
x=Bidirectional(LSTM(128,return_sequences=True))(x)
x=Dropout(0.3)(x)
x=Bidirectional(LSTM(128,return_sequences=True))(x)
x=Dropout(0.3)(x)
x=Dense(128,activation='relu')(x)
x=Dropout(0.3)(x)
x=Dense(64,activation='relu')(x)
x=Dropout(0.3)(x)
x=Attention()(x)
output=Dense(total_emotions,activation='sigmoid')(x)



In [190]:
from tensorflow.keras.models import Model

In [191]:
model1=Model(inputs=input_shape,outputs=output)

In [192]:
model1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [193]:
from tensorflow.keras.callbacks import EarlyStopping
early=EarlyStopping(monitor='val_loss',patience=3,verbose=1)

In [194]:
model1.fit(X_train_seq,y_train,epochs=10,validation_data=(X_val_seq,y_val),callbacks=[early])

Epoch 1/10
[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 31ms/step - accuracy: 0.2831 - loss: 0.1702 - val_accuracy: 0.3581 - val_loss: 0.1334
Epoch 2/10
[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 29ms/step - accuracy: 0.3966 - loss: 0.1302 - val_accuracy: 0.4488 - val_loss: 0.1227
Epoch 3/10
[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 29ms/step - accuracy: 0.4448 - loss: 0.1200 - val_accuracy: 0.4595 - val_loss: 0.1210
Epoch 4/10
[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 29ms/step - accuracy: 0.4656 - loss: 0.1138 - val_accuracy: 0.4508 - val_loss: 0.1196
Epoch 5/10
[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 29ms/step - accuracy: 0.4812 - loss: 0.1093 - val_accuracy: 0.4633 - val_loss: 0.1234
Epoch 6/10
[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 29ms/step - accuracy: 0.4945 - loss: 0.1042 - val_accuracy: 0.4659 - val_loss: 0.1208
Epoc

<keras.src.callbacks.history.History at 0x7888df0003e0>

In [195]:
emotion_labels = [
    "admiration", "amusement", "anger", "annoyance", "approval",
    "caring", "confusion", "curiosity", "desire", "disappointment",
    "disapproval", "disgust", "embarrassment", "excitement", "fear",
    "gratitude", "grief", "joy", "love", "nervousness", "optimism",
    "pride", "realization", "relief", "remorse", "sadness", "surprise"
]

In [196]:
!pip install gradio



In [197]:
import gradio as gr
import numpy as np

In [198]:
emotion_labels = dataset["train"].features["labels"].feature.names


In [199]:
def predict_emotion(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding="post")

    probs = model1.predict(padded, verbose=0)[0]

    num_classes = len(probs)
    labels = emotion_labels[:num_classes]

    top_idx = int(np.argmax(probs))
    top_emotion = labels[top_idx]
    confidence = float(probs[top_idx])

    # top-3 safely
    top3_idx = probs.argsort()[-3:][::-1]
    top3 = {
        labels[i]: round(float(probs[i]) * 100, 2)
        for i in top3_idx
        if i < len(labels)
    }

    if confidence < 0.4:
        top_emotion = "Uncertain"

    return top_emotion, round(confidence * 100, 2), top3


In [200]:
interface = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Textbox(
        lines=3,
        placeholder="Type a sentence expressing emotion..."
    ),
    outputs=[
        gr.Textbox(label="Predicted Emotion"),
        gr.Number(label="Confidence (%)"),
        gr.JSON(label="Top 3 Emotions")
    ],
    title="Emotion Detection (BiLSTM + Attention)",
    description=(
        "This model detects emotions from text using "
        "Bidirectional LSTM with Attention mechanism "
        "trained on the GoEmotions dataset."
    ),
    examples=[
        ["I feel absolutely amazing today!"],
        ["I am scared and nervous about the exam."],
        ["That was the worst experience ever."]
    ]
)

In [201]:
interface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d7dde6b53d86813138.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


