**Picture Naming - bias analyses**

different plots for true vs. predicted scores
- predicted vs. actual score (gender two colors, country two colors)
- predicted vs. actual score with distributions; male, female, uk, us
- errors sorted from highest to lowest with demographic information

In [11]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams["font.family"] = "Arial"
sns.set_theme(context="paper", style="white")

# paths / settings 
OOF_PATH = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/oof_results/oof_preds_all_scores.csv"
SAVE_DIR = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/plots"
os.makedirs(SAVE_DIR, exist_ok=True)

TARGET = "PictureNamingScore"
TASK   = "picnicScene"
MODEL  = "full"
score   = "Picture Naming Score"

gender_colors  = {"f": "tomato", "m": "royalblue"}
country_colors = {"uk": "tomato", "usa": "royalblue"}

# load OOF 
oof = pd.read_csv(OOF_PATH)

# subject id
if "Subject_ID" in oof.columns:
    oof = oof.rename(columns={"Subject_ID":"subject"})
elif "subject" not in oof.columns:
    raise KeyError(f"No subject id column found. OOF columns: {list(oof.columns)}")

# filter to subset
need = ["subject","target","model","y_true","y_pred","task"]
for c in need:
    if c not in oof.columns:
        raise KeyError(f"Column '{c}' missing in OOF file.")
sub = oof.loc[
    (oof["target"] == TARGET) &
    (oof["model"]  == MODEL)  &
    (oof["task"]   == TASK),
    :
].copy()

# use label columns directly 
# Expect exactly "Gender_label" in {'f','m'} and "Country_label" in {'uk','usa'}
if "Gender_label" not in sub.columns or "Country_label" not in sub.columns:
    raise KeyError("Expected 'Gender_label' and 'Country_label' in the OOF file.")

sub["Gender"]  = sub["Gender_label"].astype(str).str.strip().str.lower()
sub["Country"] = sub["Country_label"].astype(str).str.strip().str.lower()

# keep only the two valid levels (single filter is enough)
sub = sub[sub["Gender"].isin(["f","m"]) & sub["Country"].isin(["uk","usa"])]


# (1) error CSV
sub["error"]     = sub["y_pred"] - sub["y_true"]
sub["abs_error"] = sub["error"].abs()
err_cols = ["subject","y_true","y_pred","error","Gender","Country"]
err_sorted = sub.sort_values("abs_error", ascending=False)[err_cols]
err_csv = os.path.join(SAVE_DIR, f"{TARGET}_errors_sorted_{TASK}_{MODEL}.csv")
err_sorted.to_csv(err_csv, index=False)
print("Saved error CSV ->", err_csv)

# (2) predicted-score distributions 
def plot_pred_distributions(df, group_col, palette, title_suffix, fname_suffix):
    g = sns.FacetGrid(df, hue=group_col, palette=palette, height=5, aspect=1.6)
    g.map(sns.histplot, "y_pred", kde=True, bins=20, alpha=0.6)
    g.add_legend()
    g.set_axis_labels(f"{score} (Predicted)", "Number of People")
    plt.title(f"Distribution of predicted {score} by {title_suffix}")
    out = os.path.join(SAVE_DIR, f"{TARGET}_pred_distribution_by_{fname_suffix}_{TASK}_{MODEL}.png")
    plt.savefig(out, dpi=300, bbox_inches="tight")
    plt.close()
    print("Saved plot ->", out)

plot_pred_distributions(sub, "Gender",  gender_colors,  "Gender",  "gender")
plot_pred_distributions(sub, "Country", country_colors, "Country", "country")

# (3) jointplots (simple, no stats/lines) 
def common_lims(df, pad=0.25):
    lo = np.nanmin([df["y_true"].min(), df["y_pred"].min()]) - pad
    hi = np.nanmax([df["y_true"].max(), df["y_pred"].max()]) + pad
    return lo, hi

lims = common_lims(sub)

def joint_with_hue(df, hue, palette, title, outfile):
    if df.empty: return
    g = sns.jointplot(
        data=df, x="y_true", y="y_pred",
        hue=hue, kind="scatter",
        height=6, space=0.18,              # small gap so marginals don't sit on the frame
        palette=palette,
        marginal_ticks=True,               # show ticks on marginals
        marginal_kws=dict(fill=True, alpha=0.25, common_norm=False)
    )

    #  main panel: axes visible + identity line + shared limits 
    g.ax_joint.plot([lims[0], lims[1]], [lims[0], lims[1]], ls="--", lw=1.2, color="black")
    g.ax_joint.set_xlim(lims); g.ax_joint.set_ylim(lims)
    g.set_axis_labels(f"{score} (True)", f"{score} (Predicted)")
    g.ax_joint.tick_params(axis="both", labelsize=10)
    # keep left/bottom spines; hide the extra two so it looks clean
    g.ax_joint.spines["left"].set_visible(True)
    g.ax_joint.spines["bottom"].set_visible(True)
    g.ax_joint.spines["top"].set_visible(False)
    g.ax_joint.spines["right"].set_visible(False)

    #  marginals: light axes so you can read the scale if needed 
    for ax in (g.ax_marg_x, g.ax_marg_y):
        ax.tick_params(length=3, labelsize=8)  # small ticks/labels
        # keep a thin frame
        for side in ax.spines:
            ax.spines[side].set_linewidth(0.6)

    g.fig.suptitle(title, y=1.02)
    g.fig.savefig(os.path.join(SAVE_DIR, outfile), dpi=300, bbox_inches="tight")
    plt.close(g.fig)


def joint_single(df, title, color, outfile):
    if df.empty:
        print(f"[skip] empty subset for {title}")
        return
    g = sns.jointplot(
        data=df, x="y_true", y="y_pred",
        kind="scatter",
        height=6, space=0.18,
        color=color,
        marginal_ticks=True,
        marginal_kws=dict(fill=True, alpha=0.25)
    )

    g.ax_joint.plot([lims[0], lims[1]], [lims[0], lims[1]], ls="--", lw=1.2, color="black")
    g.ax_joint.set_xlim(lims); g.ax_joint.set_ylim(lims)
    g.set_axis_labels(f"{score} (True)", f"{score} (Predicted)")
    g.ax_joint.tick_params(axis="both", labelsize=10)
    g.ax_joint.spines["left"].set_visible(True)
    g.ax_joint.spines["bottom"].set_visible(True)
    g.ax_joint.spines["top"].set_visible(False)
    g.ax_joint.spines["right"].set_visible(False)

    for ax in (g.ax_marg_x, g.ax_marg_y):
        ax.tick_params(length=3, labelsize=8)
        for side in ax.spines:
            ax.spines[side].set_linewidth(0.6)

    g.fig.suptitle(title, y=1.02)
    g.fig.savefig(os.path.join(SAVE_DIR, outfile), dpi=300, bbox_inches="tight")
    plt.close(g.fig)


# by Country (2 colors)
joint_with_hue(
    sub, "Country", country_colors,
    f"{score}: Predicted vs True by Country",
    f"{TARGET}_joint_by_country_{TASK}_{MODEL}.png"
)

# by Gender (2 colors)
joint_with_hue(
    sub, "Gender", gender_colors,
    f"{score}: Predicted vs True by Gender",
    f"{TARGET}_joint_by_gender_{TASK}_{MODEL}.png"
)

# intersections (F-UK, F-USA, M-UK, M-USA)
for (g_val, c_val, clr) in [("f","uk","tomato"), ("f","usa","royalblue"),
                            ("m","uk","tomato"), ("m","usa","royalblue")]:
    ss = sub[(sub["Gender"]==g_val) & (sub["Country"]==c_val)]
    if len(ss) < 1:
        print(f"[skip] {g_val.upper()}-{c_val.upper()} empty"); continue
    joint_single(
        ss,
        f"{score}: Predicted vs True ({g_val.upper()} / {c_val.upper()})",
        clr,
        f"{TARGET}_joint_{g_val}_{c_val}_{TASK}_{MODEL}.png"
    )


Saved error CSV -> /Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/plots/PictureNamingScore_errors_sorted_picnicScene_full.csv
Saved plot -> /Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/plots/PictureNamingScore_pred_distribution_by_gender_picnicScene_full.png
Saved plot -> /Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/plots/PictureNamingScore_pred_distribution_by_country_picnicScene_full.png


In [12]:
import os
import pandas as pd
import numpy as np

# same folder + filename pattern you used before
SAVE_DIR = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/plots"
ERR_CSV  = os.path.join(SAVE_DIR, "PictureNamingScore_errors_sorted_picnicScene_full.csv")

df = pd.read_csv(ERR_CSV)

# sanity: keep clean labels
df["Country"] = df["Country"].astype(str).str.lower()
df["Gender"]  = df["Gender"].astype(str).str.lower()

# helper to build one row of summary stats for a given top subset
def summarize_top(sub, label):
    n = len(sub)
    row = {"bucket": label, "n": n}

    # counts
    row.update({f"country_{k}_n": int(v) for k, v in sub["Country"].value_counts().to_dict().items()})
    row.update({f"gender_{k}_n":  int(v) for k, v in sub["Gender"].value_counts().to_dict().items()})

    # percents within the bucket
    for k in ["uk","usa"]:
        row[f"country_{k}_pct"] = (sub["Country"].eq(k).mean() if n > 0 else np.nan)
    for k in ["f","m"]:
        row[f"gender_{k}_pct"]  = (sub["Gender"].eq(k).mean()  if n > 0 else np.nan)

    return row

# overall (baseline) for comparison
rows = [summarize_top(df, "overall")]

# choose some top-k and top-% cutoffs
TOP_KS    = [10, 20, 50]
TOP_FRACS = [0.10, 0.20]  # 10%, 20%

# top-k
for k in TOP_KS:
    k = min(k, len(df))
    rows.append(summarize_top(df.nlargest(k, "abs_error"), f"top_{k}"))

# top-% by absolute error
for frac in TOP_FRACS:
    k = int(np.ceil(frac * len(df)))
    rows.append(summarize_top(df.nlargest(k, "abs_error"), f"top_{int(frac*100)}pct"))

summary = pd.DataFrame(rows)

# order columns nicely
ordered_cols = (
    ["bucket","n",
     "country_uk_n","country_usa_n","country_uk_pct","country_usa_pct",
     "gender_f_n","gender_m_n","gender_f_pct","gender_m_pct"]
)
summary = summary.reindex(columns=ordered_cols)

# show & save
print(summary.to_string(index=False))
out_csv = os.path.join(SAVE_DIR, "highest_error_group_breakdown.csv")
summary.to_csv(out_csv, index=False)
print("\nSaved summary ->", out_csv)


KeyError: 'abs_error'

In [17]:
import pandas as pd
import numpy as np

path = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/plots/PictureNamingScore_errors_sorted_picnicScene_full.csv"
df = pd.read_csv(path)

# ensure 'error' exists; if not, build it from y_true - y_pred
if "error" not in df.columns and {"y_true","y_pred"}.issubset(df.columns):
    df["error"] = df["y_true"] - df["y_pred"]

# drop rows with missing essentials to match behavior consistently
sub = df.dropna(subset=["y_true", "error", "Country"]).copy()

res = (
    sub.groupby("Country", dropna=False)
       .apply(lambda x: pd.Series({
           "n": len(x),
           "mse": np.mean(np.square(x["error"])),            # 1/n * Σ err^2
           "var": x["y_true"].var(ddof=1),                   # sample variance (matches .var())
           "r^2": 1 - np.mean(np.square(x["error"])) / x["y_true"].var(ddof=1),
           # "r^2_sk": metrics.r2_score(x["y_true"], x["y_pred"]) if {"y_true","y_pred"}.issubset(x.columns) else np.nan,
           "rmse": np.sqrt(np.mean(np.square(x["error"]))),
           "mae": np.mean(np.abs(x["error"])),
       }))
       .reset_index()
       .sort_values("Country")  # or .sort_values("r^2", ascending=False)
)

print(res.to_string(index=False))


Country     n     mse      var      r^2     rmse      mae
     uk 482.0 4.65052 5.024064 0.074351 2.156506 1.551631
    usa 477.0 6.40433 9.797403 0.346324 2.530678 1.824596


  .apply(lambda x: pd.Series({


In [2]:
import pandas as pd
import numpy as np

path = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/regression/bias/predicted_vs_actual_scores/PictureNamingScore_errors_sorted_picnicScene_full.csv"
df = pd.read_csv(path)

# ensure 'error' exists; if not, build it from y_true - y_pred
if "error" not in df.columns and {"y_true","y_pred"}.issubset(df.columns):
    df["error"] = df["y_true"] - df["y_pred"]

# drop rows with missing essentials to match behavior consistently
sub = df.dropna(subset=["y_true", "error", "Gender"]).copy()

res = (
    sub.groupby("Gender", dropna=False)
       .apply(lambda x: pd.Series({
           "n": len(x),
           "mse": np.mean(np.square(x["error"])),            # 1/n * Σ err^2
           "var": x["y_true"].var(ddof=1),                   # sample variance (matches .var())
           "r^2": 1 - np.mean(np.square(x["error"])) / x["y_true"].var(ddof=1),
           # "r^2_sk": metrics.r2_score(x["y_true"], x["y_pred"]) if {"y_true","y_pred"}.issubset(x.columns) else np.nan,
           "rmse": np.sqrt(np.mean(np.square(x["error"]))),
           "mae": np.mean(np.abs(x["error"])),
       }))
       .reset_index()
       .sort_values("Gender")  # or .sort_values("r^2", ascending=False)
)

print(res.to_string(index=False))


Gender     n      mse       var      r^2     rmse      mae
     f 588.0 5.043957  5.828191 0.134559 2.245875 1.618516
     m 371.0 6.281859 10.120420 0.379289 2.506364 1.796581


  .apply(lambda x: pd.Series({
