In [1]:
pip install transformers datasets torch scikit-learn





In [2]:
# Import required libraries
from datasets import load_dataset
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score
import torch

print("Libraries loaded successfully!")


Libraries loaded successfully!


In [3]:
# Load the GoEmotions dataset
print("Loading dataset...")
dataset = load_dataset("go_emotions")

# Use a small subset of the dataset
train_data = dataset["train"].select(range(5000))  # 1000 samples for training
test_data = dataset["test"].select(range(1000))    # 200 samples for testing

print(f"Training dataset size: {len(train_data)}")
print(f"Test dataset size: {len(test_data)}")


Loading dataset...
Training dataset size: 5000
Test dataset size: 1000


In [4]:
# Preprocess the dataset by extracting text and labels
def preprocess_data(batch):
    return {"text": batch["text"], "labels": batch["labels"]}

# Apply preprocessing to training and testing datasets
train_data = train_data.map(preprocess_data)
test_data = test_data.map(preprocess_data)

print("Dataset preprocessed successfully!")


Dataset preprocessed successfully!


In [5]:
# Load DistilBERT tokenizer and model
print("Loading model and tokenizer...")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=28  # Number of emotion labels in GoEmotions dataset
)

print("Model and tokenizer loaded successfully!")


Loading model and tokenizer...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model and tokenizer loaded successfully!


In [6]:
# Preprocess the dataset by extracting text and the first label (single-label classification)
def preprocess_data(batch):
    # Select only the first label for each sample (multi-label to single-label)
    return {"text": batch["text"], "labels": batch["labels"][0] if len(batch["labels"]) > 0 else 0}

# Apply preprocessing to training and testing datasets
train_data = train_data.map(preprocess_data)
test_data = test_data.map(preprocess_data)

print("Dataset preprocessed for single-label classification!")


Dataset preprocessed for single-label classification!


In [7]:
# Tokenize the dataset
def tokenize_data(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

# Apply tokenization to training and testing datasets
train_data = train_data.map(tokenize_data, batched=True)
test_data = test_data.map(tokenize_data, batched=True)

print("Datasets tokenized successfully!")


Datasets tokenized successfully!


In [8]:
# Convert datasets to PyTorch format
train_data.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_data.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

print("Datasets converted to PyTorch format!")


Datasets converted to PyTorch format!


In [18]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",             # Output directory for model checkpoints
    evaluation_strategy="epoch",       # Evaluate after each epoch
    num_train_epochs=3,                # Train for 1 epoch (adjust if needed)
    per_device_train_batch_size=8,     # Batch size per device
    save_total_limit=1,                # Keep only the most recent checkpoint
    logging_dir="./logs",              # Directory for logs
    logging_steps=10,    # Log every 10 steps
    learning_rate=3e-5 
)

# Define the Trainer
trainer = Trainer(
    model=model,                       # The model to train
    args=training_args,                # Training arguments
    train_dataset=train_data,          # Training dataset
    eval_dataset=test_data,            # Evaluation dataset
    tokenizer=tokenizer                # Tokenizer for preprocessing
)

# Train the model
print("Starting model training...")
trainer.train()


  trainer = Trainer(


Starting model training...


Epoch,Training Loss,Validation Loss
1,1.825,1.847217
2,1.5327,1.668983
3,1.0086,1.686018


TrainOutput(global_step=1875, training_loss=1.6739320167541505, metrics={'train_runtime': 4666.3266, 'train_samples_per_second': 3.215, 'train_steps_per_second': 0.402, 'total_flos': 194494326729984.0, 'train_loss': 1.6739320167541505, 'epoch': 3.0})

In [20]:
from sklearn.metrics import accuracy_score

# Define a custom metric function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    accuracy = accuracy_score(labels, predictions.numpy())
    return {"accuracy": accuracy}





In [22]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics  # Add custom metric function
)


  trainer = Trainer(


In [24]:
# Evaluate the model on the test dataset
print("Evaluating the model...")
results = trainer.evaluate()
print(f"Validation Loss: {results['eval_loss']}")
print(f"Accuracy: {results['eval_accuracy']}")


Evaluating the model...


Validation Loss: 1.6860178709030151
Accuracy: 0.524


In [26]:
# Save the trained model and tokenizer
model.save_pretrained("./emotion_recognition_model")
tokenizer.save_pretrained("./emotion_recognition_model")

print("Model and tokenizer saved successfully!")

from transformers import pipeline

# Load the trained model for inference
classifier = pipeline("text-classification", model="./emotion_recognition_model", tokenizer="./emotion_recognition_model")

# Test with example sentences
print(classifier("I am so happy today!"))  # Positive emotion
print(classifier("I feel really sad and down."))  # Negative emotion


Model and tokenizer saved successfully!


Device set to use cpu


[{'label': 'LABEL_17', 'score': 0.6141176223754883}]
[{'label': 'LABEL_25', 'score': 0.40091267228126526}]


In [28]:
emotions = dataset["train"].features["labels"].feature.names
print("Emotion labels:", emotions)

Emotion labels: ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']


In [52]:
# Define a helper function to get meaningful labels
def predict_emotion(texts):
    results = []
    for text in texts:
        prediction = classifier(text)[0]  # Get the top prediction
        label_index = int(prediction["label"].split("_")[-1])  # Extract numeric label index
        emotion = emotions[label_index]  # Map to the emotion name
        results.append(f"Text: '{text}' -> Predicted Emotion: {emotion}, Confidence: {prediction['score']:.2f}")
    return results


In [36]:
# Load the GoEmotions label names
emotions = dataset["train"].features["labels"].feature.names

# Map the model output to emotion labels
example_1 = classifier("I am so happy today!")[0]
example_1_label = emotions[int(example_1["label"].split("_")[-1])]
print(f"Text: 'I am so happy today!' -> Predicted Emotion: {example_1_label}")

example_2 = classifier("I feel really sad and down.")[0]
example_2_label = emotions[int(example_2["label"].split("_")[-1])]
print(f"Text: 'I feel really sad and down.' -> Predicted Emotion: {example_2_label}")


Text: 'I am so happy today!' -> Predicted Emotion: joy
Text: 'I feel really sad and down.' -> Predicted Emotion: sadness


In [54]:
print(classifier("This is the best day of my life!"))
print(classifier("I am feeling very anxious and nervous."))
print(classifier("What a boring and disappointing event."))


[{'label': 'LABEL_0', 'score': 0.9012430310249329}]
[{'label': 'LABEL_25', 'score': 0.1806475669145584}]
[{'label': 'LABEL_9', 'score': 0.34697696566581726}]


In [46]:

def classify_and_print(text, emotions, classifier):
    result = classifier(text)[0] 
    label_index = int(result['label'].split('_')[-1])  # 'LABEL_0' -> 0
    emotion = emotions[label_index]  
    print(f"Text: {text} -> Predicted Emotion: {emotion}, Confidence: {result['score']:.2f}")


emotions = dataset["train"].features["labels"].feature.names

classify_and_print("I am incredibly proud of what I've accomplished.", emotions, classifier)
classify_and_print("This makes me feel so angry and frustrated.", emotions, classifier)


Text: I can't believe how amazing this is! -> Predicted Emotion: admiration, Confidence: 0.50
Text: Why does everything have to be so difficult? -> Predicted Emotion: curiosity, Confidence: 0.46
Text: I am incredibly proud of what I've accomplished. -> Predicted Emotion: admiration, Confidence: 0.91
Text: This makes me feel so angry and frustrated. -> Predicted Emotion: anger, Confidence: 0.25


In [62]:

sentences_and_paragraphs = [
    "I am so grateful for all the support I've received!",
    "This is absolutely the worst day of my life.",
    "Wow, I can't stop laughing at this hilarious joke!",
    "I feel so calm and relaxed when I'm by the sea.",
    "It's infuriating how people can be so inconsiderate!",
    """Today was one of the most exciting days of my life. I got to meet my favorite author, 
    and she even signed my book! It feels like a dream come true. I'm so thrilled and inspired 
    to start writing again after talking to her.""",
    """Life has been quite challenging lately. Every time I think I've made progress, another 
    obstacle appears. Sometimes, I wonder if things will ever get better. It's exhausting to 
    keep trying and failing, but I guess I have no choice but to move forward.""",
    """I woke up early this morning, made a cup of coffee, and sat on the balcony to watch the sunrise. 
    The world felt so peaceful, and I couldn’t help but smile. These quiet moments remind me of how beautiful 
    life can be, even in its simplest forms."""
]


for text in sentences_and_paragraphs:
    classify_and_print(text, emotions, classifier)


Text: I am so grateful for all the support I've received! -> Predicted Emotion: gratitude, Confidence: 0.77
Text: This is absolutely the worst day of my life. -> Predicted Emotion: sadness, Confidence: 0.17
Text: Wow, I can't stop laughing at this hilarious joke! -> Predicted Emotion: amusement, Confidence: 0.43
Text: I feel so calm and relaxed when I'm by the sea. -> Predicted Emotion: approval, Confidence: 0.13
Text: It's infuriating how people can be so inconsiderate! -> Predicted Emotion: anger, Confidence: 0.28
Text: Today was one of the most exciting days of my life. I got to meet my favorite author, 
    and she even signed my book! It feels like a dream come true. I'm so thrilled and inspired 
    to start writing again after talking to her. -> Predicted Emotion: admiration, Confidence: 0.45
Text: Life has been quite challenging lately. Every time I think I've made progress, another 
    obstacle appears. Sometimes, I wonder if things will ever get better. It's exhausting to 
 

In [64]:
# Example sentences
classify_and_print("I can't believe how kind everyone has been to me today.", emotions, classifier)
classify_and_print("Why does everything seem to go wrong at the worst possible time?", emotions, classifier)
classify_and_print("I’m bursting with joy after hearing this wonderful news!", emotions, classifier)
classify_and_print("This situation is making me more frustrated than I’ve ever been.", emotions, classifier)
classify_and_print("I feel so incredibly lucky to have such supportive friends.", emotions, classifier)
classify_and_print("The way they handled that issue was so unprofessional, I’m shocked!", emotions, classifier)
classify_and_print("It's such a beautiful day; I feel like everything is perfect right now.", emotions, classifier)
classify_and_print("I can’t stop crying because this memory is so emotional for me.", emotions, classifier)

Text: I can't believe how kind everyone has been to me today. -> Predicted Emotion: surprise, Confidence: 0.24
Text: Why does everything seem to go wrong at the worst possible time? -> Predicted Emotion: curiosity, Confidence: 0.47
Text: I’m bursting with joy after hearing this wonderful news! -> Predicted Emotion: admiration, Confidence: 0.30
Text: This situation is making me more frustrated than I’ve ever been. -> Predicted Emotion: disappointment, Confidence: 0.17
Text: I feel so incredibly lucky to have such supportive friends. -> Predicted Emotion: joy, Confidence: 0.36
Text: The way they handled that issue was so unprofessional, I’m shocked! -> Predicted Emotion: surprise, Confidence: 0.26
Text: It's such a beautiful day; I feel like everything is perfect right now. -> Predicted Emotion: admiration, Confidence: 0.90
Text: I can’t stop crying because this memory is so emotional for me. -> Predicted Emotion: sadness, Confidence: 0.25


In [66]:


# Example paragraphs
classify_and_print("""After a long and tiring journey, I finally reached the summit of the mountain. 
The view was breathtaking, and all the effort felt completely worth it. Standing there, I felt 
an overwhelming sense of accomplishment and peace.""", emotions, classifier)

classify_and_print("""This has been the most disappointing project I’ve ever worked on. 
No one met their deadlines, and the result didn’t even come close to what we expected. 
It’s hard not to feel let down after putting in so much effort.""", emotions, classifier)

classify_and_print("""Last night, I had the best dinner with my family. We laughed, shared stories, 
and simply enjoyed each other's company. It reminded me how precious these moments are 
and how grateful I am for my loved ones.""", emotions, classifier)

classify_and_print("""Sometimes, I feel like the weight of the world is on my shoulders. 
Between work, studies, and personal responsibilities, it’s hard to find a moment to breathe. 
But deep down, I know I’ll get through it all, even if it feels overwhelming now.""", emotions, classifier)

classify_and_print("""Yesterday, I got an unexpected letter from an old friend. Reading it brought back so 
many happy memories, and it felt like no time had passed since we last spoke. It’s amazing how 
some connections stay strong no matter how much time goes by.""", emotions, classifier)

classify_and_print("""Spending time at the beach this weekend was pure bliss. The sound of the waves, 
the warmth of the sun, and the cool ocean breeze made me feel so relaxed and content. 
I wish I could stay there forever.""", emotions, classifier)

classify_and_print("""The announcement today left me speechless. I didn’t think such a big change 
would happen so suddenly, and I’m still processing it. It’s hard to say if I feel more excited 
or anxious about what’s coming next.""", emotions, classifier)

classify_and_print("""I’ve been working so hard on this project, and finally seeing it come together feels amazing. 
Every late night and tough decision has paid off, and I’m incredibly proud of what I’ve accomplished.""", emotions, classifier)



Text: After a long and tiring journey, I finally reached the summit of the mountain. 
The view was breathtaking, and all the effort felt completely worth it. Standing there, I felt 
an overwhelming sense of accomplishment and peace. -> Predicted Emotion: admiration, Confidence: 0.54
Text: This has been the most disappointing project I’ve ever worked on. 
No one met their deadlines, and the result didn’t even come close to what we expected. 
It’s hard not to feel let down after putting in so much effort. -> Predicted Emotion: disappointment, Confidence: 0.35
Text: Last night, I had the best dinner with my family. We laughed, shared stories, 
and simply enjoyed each other's company. It reminded me how precious these moments are 
and how grateful I am for my loved ones. -> Predicted Emotion: joy, Confidence: 0.39
Text: Sometimes, I feel like the weight of the world is on my shoulders. 
Between work, studies, and personal responsibilities, it’s hard to find a moment to breathe. 
But deep d