In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
def calculate_f1_score(conf_matrix):
    num_classes = len(conf_matrix)
    f1_scores = []

    for i in range(num_classes):
        tp = conf_matrix[i, i]
        fp = np.sum(conf_matrix[:, i]) - tp
        fn = np.sum(conf_matrix[i, :]) - tp

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0

        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        f1_scores.append(f1)

    return f1_scores

def calculate_micro_f1(conf_matrix):
    # Calculate aggregated values
    tp_total = np.sum(np.diag(conf_matrix))
    fp_total = np.sum(conf_matrix, axis=0) - np.diag(conf_matrix)
    fn_total = np.sum(conf_matrix, axis=1) - np.diag(conf_matrix)

    # Compute micro-averaged precision, recall, and F1 score
    precision_micro = tp_total / (tp_total + np.sum(fp_total))
    recall_micro = tp_total / (tp_total + np.sum(fn_total))
    f1_micro = 2 * (precision_micro * recall_micro) / (precision_micro + recall_micro) if (precision_micro + recall_micro) > 0 else 0

    return f1_micro


In [None]:
file = "auditor_sentiment.npy"
conf_matrix = np.load('confusion_matrices/' + file).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix.T,
                              display_labels=["Positive", "Neutral", "Negative"])
disp.plot(values_format='')
plt.show()

In [None]:
file = "financial-bert.npy"
conf_matrix = np.load('confusion_matrices/' + file).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix.T,
                              display_labels=["Positive", "Neutral", "Negative"])
disp.plot(values_format='')
plt.show()

In [None]:
file = "financial-roberta.npy"
conf_matrix = np.load('confusion_matrices/' + file).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix.T,
                              display_labels=["Positive", "Neutral", "Negative"])
disp.plot(values_format='')
plt.show()

In [None]:
file = "twitter-roberta.npy"
conf_matrix = np.load('confusion_matrices/' + file).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix.T,
                              display_labels=["Positive", "Neutral", "Negative"])
disp.plot(values_format='')
plt.show()

In [None]:
confusion_matrix_folder = 'confusion_matrices'

models = ['auditor_sentiment', 'financial-bert', 'financial-roberta', 'twitter-roberta'] # here paste model names
results = []
for model in models:
    conf_matrix = np.load(f'{confusion_matrix_folder}/{model}.npy')
    acc = np.diag(conf_matrix).sum() / conf_matrix.sum()
    f1 = calculate_f1_score(conf_matrix)
    results += [{
        'model': model,
        'accuracy': acc,
        'f1_macro': np.mean(f1),
        'f1_micro' : calculate_micro_f1(conf_matrix),
        'f1_negative': f1[0],
        'f1_neutral': f1[1],
        'f1_positive': f1[2],
    }]
results = pd.DataFrame(results)

In [None]:
results

In [None]:
metrics = results.loc[:, ["accuracy", "f1_macro", "f1_negative", "f1_neutral", "f1_positive"]].to_numpy()

In [None]:
colors = ['#3D348B', '#7678ED', '#F7B801', '#F18701', '#F35B04', '#D62828']

fig = plt.subplots(figsize=(12, 8))

metrics_to_plot = ["accuracy", "f1_macro", "f1_negative", "f1_neutral", "f1_positive"]
barWidth = 0.8 / results.shape[0]

metrics = results.loc[:, metrics_to_plot].to_numpy()

for i in range(metrics.shape[0]):
    scores = metrics[i]
    br = [x + i * barWidth for x in np.arange(len(scores))]
    j = i if metrics.shape[0] >= 5 else i + 1
    plt.bar(br, scores, color=colors[j], width=barWidth,
        edgecolor='grey', label=results.loc[i, "model"])
    

plt.xlabel('Metric', fontweight='bold', fontsize=15)
plt.ylabel('Score', fontweight='bold', fontsize=15)
plt.xticks([r + 0.4 - barWidth / 2 for r in range(len(metrics[0]))], metrics_to_plot, fontsize=12)

plt.legend(fontsize=11)
plt.show()

In [None]:
file = "flan.npy"
conf_matrix = np.load('confusion_matrices/' + file).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix.T,
                              display_labels=["Positive", "Neutral", "Negative"])
disp.plot(values_format='')
plt.show()

In [None]:
file = "deberta.npy"
conf_matrix = np.load('confusion_matrices/' + file).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix.T,
                              display_labels=["Positive", "Neutral", "Negative"])
disp.plot(values_format='')
plt.show()

In [None]:
confusion_matrix_folder = 'confusion_matrices'

models = ['flan', 'deberta'] # here paste model names
results = []
for model in models:
    conf_matrix = np.load(f'{confusion_matrix_folder}/{model}.npy')
    acc = np.diag(conf_matrix).sum() / conf_matrix.sum()
    f1 = calculate_f1_score(conf_matrix)
    results += [{
        'model': model,
        'accuracy': acc,
        'f1_macro': np.mean(f1),
        'f1_micro' : calculate_micro_f1(conf_matrix),
        'f1_negative': f1[0],
        'f1_neutral': f1[1],
        'f1_positive': f1[2],
    }]
results = pd.DataFrame(results)

In [None]:
results

In [None]:
metrics = results.loc[:, ["accuracy", "f1_negative", "f1_neutral", "f1_positive"]].to_numpy()

In [None]:
colors = ['#3D348B', '#7678ED', '#F7B801', '#F18701', '#F35B04', '#D62828']

fig = plt.subplots(figsize=(12, 8))

metrics_to_plot = ["accuracy", "f1_macro", "f1_negative", "f1_neutral", "f1_positive"]
barWidth = 0.8 / results.shape[0]

metrics = results.loc[:, metrics_to_plot].to_numpy()

for i in range(metrics.shape[0]):
    scores = metrics[i]
    br = [x + i * barWidth for x in np.arange(len(scores))]
    j = i if metrics.shape[0] >= 5 else i + 1
    plt.bar(br, scores, color=colors[j], width=barWidth,
        edgecolor='grey', label=results.loc[i, "model"])
    

plt.xlabel('Metric', fontweight='bold', fontsize=15)
plt.ylabel('Score', fontweight='bold', fontsize=15)
plt.xticks([r + 0.4 - barWidth / 2 for r in range(len(metrics[0]))], metrics_to_plot, fontsize=12)

plt.legend(fontsize=14)
plt.show()