# Load the Dataset

In [46]:
from datasets import load_dataset

In [64]:
emotion = load_dataset('emotion')
label_names = emotion['train'].features['label'].names
label_name = {
    f"LABEL_{i}": label_names[i] for i in range(len(labels))
}

# Load the Fine-Tuned Model

In [52]:
import os
from transformers import AutoTokenizer, pipeline

In [54]:
model_name = "distilbert-base-uncased-finetuned-emotion"
model_dir = os.path.join("../models", model_name)
model_on_hub = "florianehmann/" + model_name

if os.path.exists(model_dir):
    classifier = pipeline("text-classification", model=model_dir)
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
else:
    classifier = pipeline("text-classification", model=model_on_hub)
    tokenizer = AutoTokenizer.from_pretrained(model_on_hub)

In [55]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

In [57]:
emotion_tokenized = emotion.map(tokenize, batched=True, batch_size=None)

# Analyze the Results

## Determine a Consufion Matrix

In [66]:
import pandas as pd

In [65]:
pred = classifier.predict(emotion['validation']['text'])

In [69]:
pred_df = pd.DataFrame(pred)
pred_df['label_name'] = pred_df['label'].apply(lambda l: label_name[l])

In [70]:
pred_df.head()

Unnamed: 0,label,score,label_name
0,LABEL_0,0.989753,sadness
1,LABEL_0,0.990213,sadness
2,LABEL_2,0.618537,love
3,LABEL_3,0.984676,anger
4,LABEL_1,0.992215,joy
