In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# Load your CSV
df = pd.read_csv("./data/result/MASTER.csv")

# Initialize results dictionary
results = {}

# Loop through LLM model columns
model_columns = ["gpt-4o-mini", "gpt-4-0125-preview", "gpt-3.5-turbo-0125", "llama3.2-3b", "llama3.1-8b", "llama3-8b"]#, "gemini-1.5-flash"]
for model in model_columns:
    # cast SEXOISM to int
    y_true = df['SEXISM']
    df[model] = pd.to_numeric(df[model], errors='coerce').fillna(0).astype(int)
    y_pred = df[model]
    
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    conf_matrix = confusion_matrix(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)

    # Store metrics
    results[model] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "Confusion Matrix": conf_matrix.tolist(),  # Convert to list for better readability
        "AUC": auc
    }

results_df = pd.DataFrame(results)

# Melt the DataFrame for easier visualization
results_melted = results_df.melt(id_vars="Model", var_name="Metric", value_name="Score")

# Plot using Seaborn
plt.figure(figsize=(12, 8))
sns.barplot(data=results_melted, x="Model", y="Score", hue="Metric", palette="viridis")

# Customize plot
plt.title("Performance Metrics for Each Model", fontsize=16)
plt.xlabel("Model", fontsize=12)
plt.ylabel("Score", fontsize=12)
plt.xticks(rotation=45, fontsize=10)
plt.legend(title="Metric", fontsize=10)
plt.tight_layout()

# Show the plot
plt.show()


KeyError: "The following id_vars or value_vars are not present in the DataFrame: ['Model']"