<a href="https://colab.research.google.com/github/danielmlow/tutorials/blob/main/text/sentiment_analysis_emotion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Emotion classification (pre-trained)

In [10]:
!pip install -q transformers

In [None]:
import pandas as pd
from transformers import RobertaTokenizerFast, TFRobertaForSequenceClassification, pipeline


In [None]:
def huggingface_output_2_df(output_dict, add_to_col_names = None):
	feature_names = [n.get('label') for n in output_dict[0]]
	if add_to_col_names:
		feature_names = [add_to_col_names+n for n in feature_names]
	feature_vectors = []
	for doc in output_dict:
		feature_vectors_doc = []
		for feature in doc:
			feature_vectors_doc.append(feature.get('score'))
		feature_vectors.append(feature_vectors_doc)
	feature_vectors = pd.DataFrame(feature_vectors, columns = feature_names)
	return feature_vectors

In [None]:
docs = ['I am happy', 'I have happy, but worried about tomorrow', "I'm miserable", "I'm sad, but hopeful", "Don't talk to me like that!", "Really? I'm shocked!"]

# emoroberta trained on Go Emotions

https://huggingface.co/arpanghoshal/EmoRoBERTa

In [11]:
tokenizer = RobertaTokenizerFast.from_pretrained("arpanghoshal/EmoRoBERTa")
model = TFRobertaForSequenceClassification.from_pretrained("arpanghoshal/EmoRoBERTa")
emotion = pipeline('sentiment-analysis', 
                    model='arpanghoshal/EmoRoBERTa')

Downloading (…)okenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading (…)"tf_model.h5";:   0%|          | 0.00/501M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at arpanghoshal/EmoRoBERTa.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at arpanghoshal/EmoRoBERTa.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [28]:
output_dict = emotion(docs, return_all_scores=True)
feature_vectors = huggingface_output_2_df(output_dict, add_to_col_names = 'emoroberta_')
feature_vectors

Unnamed: 0,emoroberta_admiration,emoroberta_amusement,emoroberta_anger,emoroberta_annoyance,emoroberta_approval,emoroberta_caring,emoroberta_confusion,emoroberta_curiosity,emoroberta_desire,emoroberta_disappointment,...,emoroberta_love,emoroberta_nervousness,emoroberta_optimism,emoroberta_pride,emoroberta_realization,emoroberta_relief,emoroberta_remorse,emoroberta_sadness,emoroberta_surprise,emoroberta_neutral
0,0.004448,0.000923,6.1e-05,0.000282,0.006051,0.003484,3.6e-05,0.000231,0.000112,0.000132,...,0.00052,1.3e-05,0.000367,0.000159,0.000429,0.001209,2.4e-05,4.1e-05,0.000144,0.005801
1,0.000334,0.000561,3.8e-05,0.000199,0.006805,0.010858,5.9e-05,0.000198,0.000464,0.000218,...,0.000439,0.000466,0.001454,0.000112,0.00046,0.00405,4e-05,0.000154,9.4e-05,0.002192
2,3.1e-05,0.000238,0.005566,0.005236,0.000218,0.000454,3.3e-05,6.5e-05,0.000112,0.089282,...,0.00026,0.000575,5.6e-05,3.6e-05,0.000829,2.5e-05,0.000351,0.89078,5.2e-05,0.00128
3,1.1e-05,0.000743,0.000156,0.000318,8.7e-05,0.002711,6e-05,0.00011,7.1e-05,0.003636,...,7.5e-05,0.000484,0.001483,4e-06,0.000603,2.6e-05,0.001311,0.984306,5.3e-05,0.001323
4,4.8e-05,6.7e-05,0.980031,0.012423,0.000131,0.000133,9.2e-05,2.1e-05,1.4e-05,0.00043,...,0.000232,3.5e-05,4.2e-05,0.000151,8.8e-05,1.1e-05,5.4e-05,0.00034,5.1e-05,0.000968
5,4.7e-05,7.2e-05,2.6e-05,7.2e-05,5.3e-05,1.7e-05,0.000153,0.000274,5.3e-05,5.4e-05,...,1.8e-05,2.3e-05,0.000236,1.4e-05,0.001299,2.9e-05,3e-06,8e-06,0.996606,0.000131


# distilbert-base-uncased-emotion

https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion


In [None]:
!pip install -q transformers

In [29]:
from transformers import pipeline
import pandas as pd

In [30]:
docs = ['I am happy', 'I have happy, but worried about tomorrow', "I'm miserable", "I'm sad, but hopeful", "Don't talk to me like that!", "Really? I'm shocked!"]

In [31]:
sentiment_pipeline = pipeline(model = "bhadresh-savani/distilbert-base-uncased-emotion")

In [33]:
output_dict = sentiment_pipeline(docs, return_all_scores=True)
feature_vectors = huggingface_output_2_df(output_dict, add_to_col_names = 'distilbert_')
feature_vectors



Unnamed: 0,distilbert_sadness,distilbert_joy,distilbert_love,distilbert_anger,distilbert_fear,distilbert_surprise
0,0.000544,0.998437,0.000456,0.000262,0.000143,0.000158
1,0.035586,0.286981,0.00568,0.138778,0.531989,0.000986
2,0.998362,0.000282,0.00018,0.000899,0.000147,0.00013
3,0.067531,0.920843,0.007467,0.002613,0.001107,0.000439
4,0.003571,0.002445,0.000491,0.988826,0.004357,0.00031
5,0.001253,0.003606,0.001144,0.001026,0.002769,0.990202
