In [1]:
import pandas as pd

In [2]:
from transformers import pipeline

# Load the fine-tuned BERT-Emotion model
sentiment_analysis = pipeline("text-classification", model="boltuix/bert-emotion")

# Define label-to-emoji mapping
label_to_emoji = {
    "Sadness": "😢",
    "Anger": "😠",
    "Love": "❤️",
    "Surprise": "😲",
    "Fear": "😱",
    "Happiness": "😄",
    "Neutral": "😐",
    "Disgust": "🤢",
    "Shame": "🙈",
    "Guilt": "😔",
    "Confusion": "😕",
    "Desire": "🔥",
    "Sarcasm": "😏"
}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu


In [3]:
# Input text
text = "i am proud of you!"

# Analyze emotion
result = sentiment_analysis(text)[0]
label = result["label"].capitalize()
emoji = label_to_emoji.get(label, "❓")

# Output
print(f"Text: {text}")
print(f"Predicted Emotion: {label} {emoji}")
print(f"Confidence: {result['score']:.2%}")

Text: i am proud of you!
Predicted Emotion: Happiness 😄
Confidence: 69.75%


In [7]:
dataset = pd.read_csv('emo_data.csv', encoding="utf-8-sig")

In [8]:
len(dataset)

584

In [9]:
dataset.columns

Index(['message'], dtype='object')

In [10]:
dataset.head()

Unnamed: 0,message
0,I really enjoyed the movie
1,The food was terrible
2,I'm not sure how I feel about this
3,The service was excellent
4,I had a bad experience


In [11]:
dataset.tail()

Unnamed: 0,message
579,I have to cancel my vacation plans because I c...
580,My computer crashed and I lost all my importan...
581,I got into a car accident and my car is totale...
582,I have a cold and can't stop coughing. it's re...
583,I just found out my ex is dating someone new. ...


In [12]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0,len(dataset)):
  message = re.sub('[^a-zA-Z]', ' ', dataset['message'][i])
  message = message.lower()
  message = message.split()
  ps = PorterStemmer()
  all_stopwords = stopwords.words('english')
  all_stopwords.remove('not')
  message = [ps.stem(word) for word in message if not word in set(all_stopwords)]
  message = ' '.join(message)
  corpus.append(message)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
print(corpus)

['realli enjoy movi', 'food terribl', 'not sure feel', 'servic excel', 'bad experi', 'great product', 'recommend', 'price bit high', 'weather perfect today', 'traffic horribl', 'feel neutral', 'concert amaz', 'custom servic terribl', 'book masterpiec', 'regret buy product', 'view breathtak', 'know think', 'internet speed slow', 'museum fascin', 'nois level high', 'opinion', 'hotel room spaciou clean', 'servic restaur aw', 'softwar user friendli', 'qualiti product poor', 'realli care', 'beach crowd', 'excel compani work', 'movi wast time', 'undecid', 'game complet disast', 'sceneri beauti', 'speaker engag', 'great time', 'product overpr', 'indiffer', 'perform outstand', 'car broke highway', 'strong feel', 'food delici', 'servic hotel terribl', 'phone amaz', 'not impress', 'mix feel', 'weather terribl today', 'book disappoint', 'care one way', 'hike exhaust', 'fantast product', 'know make', 'custom servic great', 'nois level toler', 'hotel room dirti cramp', 'pleas', 'traffic light', 'mo

In [14]:
%%time
# Run predictions
predictions = sentiment_analysis(corpus, truncation=True)

# Extract predicted labels
predicted_labels = [pred["label"].capitalize() for pred in predictions]
emojis = [label_to_emoji.get(label, "❓") for label in predicted_labels]

# Add to DataFrame
dataset["predicted_emotion"] = predicted_labels
dataset["emoji"] = emojis

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


CPU times: user 3.25 s, sys: 13.3 ms, total: 3.27 s
Wall time: 3.24 s


In [17]:
predicted_labels[583]

'Sadness'

In [None]:
# from sklearn.metrics import classification_report, accuracy_score
# # If you have true labels for evaluation
# if "true_label" in dataset.columns:
#     true_labels = dataset["true_label"].tolist()

#     # Show classification report
#     print("Classification Report:")
#     print(classification_report(true_labels, predicted_labels))

#     # Show accuracy
#     acc = accuracy_score(true_labels, predicted_labels)
#     print(f"Accuracy: {acc:.2%}")
# else:
#     print("No true labels found. Only predictions will be shown.")

In [18]:
# Save to new CSV
dataset.to_csv("emo_data_predictions_with_emoji.csv", index=False, encoding="utf-8-sig")
print("Saved as emo_data_predictions_with_emoji.csv")

Saved as emo_data_predictions_with_emoji.csv
