# Plot results

Generate a bar chart comparing gender representation bias in the evaluated languages. Based on manual data entry (can be transferred from the output of `stats-summary.ipynb`).

In [None]:
import matplotlib.pyplot as plt

# Define the data
data = {
    "Czech": {
        "gpt-4o-mini": 1.24,
        "gemma-2-27b": 1.02,
        "Llama-4-Scout": 1.80,
        "Mistral-7B": 5.11,
        "Qwen2.5-7B": 2.77,
        "EuroLLM-1.7B": 3.01
    },
    "Slovenian": {
        "gpt-4o-mini": 1.03,
        "gemma-2-27b": 0.88,
        "Llama-4-Scout": 1.97,
        "Mistral-7B": 2.22,
        "Qwen2.5-7B": 1.75,
        "GaMS-1B": 1.71
    }
}

# Create the plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4), sharey=True)

# Czech plot
models_cz = list(data["Czech"].keys())
ratios_cz = list(data["Czech"].values())
bars_cz = ax1.bar(models_cz, ratios_cz, color="skyblue")
ax1.set_title("Czech")
ax1.set_xlabel("Model")
ax1.set_ylabel("M:F Ratio")
ax1.set_ylim(0, 6)
ax1.tick_params(axis='x', rotation=45)
ax1.grid(axis='y', linestyle='--', alpha=0.6)

for bar in bars_cz:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width() / 2, height + 0.1, f"{height:.2f}", ha='center', va='bottom', fontsize=9)

# Slovenian plot
models_sl = list(data["Slovenian"].keys())
ratios_sl = list(data["Slovenian"].values())
bars_sl = ax2.bar(models_sl, ratios_sl, color="lightgreen")
ax2.set_title("Slovenian")
ax2.set_xlabel("Model")
ax1.set_ylim(0, 6)
ax2.tick_params(axis='x', rotation=45)
ax2.grid(axis='y', linestyle='--', alpha=0.6)

for bar in bars_sl:
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width() / 2, height + 0.1, f"{height:.2f}", ha='center', va='bottom', fontsize=9)

# Finalize and save
#fig.suptitle("Gender Representation Bias", fontsize=14)
plt.tight_layout()
plt.savefig("grb-ratio.pdf")
plt.show()
