In [4]:
import pandas as pd
from transformers import pipeline
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, matthews_corrcoef
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Load email dataset from CSV
df = pd.read_csv("./100_emotion_emails.csv", delimiter=';')

# Drop rows where 'emotion' is NaN or empty
df = df.dropna(subset=['emotion'])
df = df[df['emotion'].str.strip() != '']

# Assuming emails are in 'text' and emotions in 'emotion'
emails = df['text'].tolist()

# Assume emotions are separated by commas if multiple (adapt if format is different)
df['emotion'] = df['emotion'].apply(lambda x: x.split(','))

# Extract unique labels for binarization
all_labels = set()
df['emotion'].apply(all_labels.update)
labels = list(all_labels)

# Binarize labels
mlb = MultiLabelBinarizer(classes=labels)
y_targets_all = mlb.fit_transform(df['emotion'])

In [8]:
# Load pre-trained model suitable for multi-label classification
classifier = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)

# Classify emails
model_outputs = classifier(emails)

# Prepare to convert model outputs to binary format
num_items = len(emails)
num_labels = len(labels)
y_probas_all = np.zeros((num_items, num_labels), dtype=float)

for i, output in enumerate(model_outputs):
    for prediction in output:
        if prediction['label'] in labels:
            label_index = labels.index(prediction['label'])
            y_probas_all[i, label_index] = prediction['score']


In [22]:
i = 3
print(np.round(y_probas_all[i], 3).tolist())
print(y_targets_all[0:3])

[0.005, 0.003, 0.001, 0.011, 0.003, 0.0, 0.007, 0.825, 0.003, 0.004, 0.002, 0.006, 0.006, 0.005]
[[0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0]]


In [18]:
# Apply threshold
threshold = 0.5
y_preds_all = y_probas_all > threshold

# Evaluate using metrics
print("Overall (macro)")
print("===============")
accuracy_val = accuracy_score(y_targets_all, y_preds_all)
precision_val = precision_score(y_targets_all, y_preds_all, average='macro', zero_division=0)
recall_val = recall_score(y_targets_all, y_preds_all, average='macro', zero_division=0)
f1_val = f1_score(y_targets_all, y_preds_all, average='macro', zero_division=0)

print(f"Accuracy: {accuracy_val:.3f}")
print(f"Precision: {precision_val:.3f}")
print(f"Recall: {recall_val:.3f}")
print(f"F1-Score: {f1_val:.3f}")


Overall (macro)
Accuracy: 0.324
Precision: 0.404
Recall: 0.338
F1-Score: 0.349
