In [None]:
import pandas as pd

from modelvshuman_dmc.datasets.experiments import get_experiments
from modelvshuman_dmc.plotting.plot import get_dataset_names, rgb
from modelvshuman_dmc.helper import plotting_helper as ph
from modelvshuman_dmc.plotting import analyses as a
from modelvshuman_dmc import constants as consts
from examples.simclr_hn_models.plotting_definitions.simclr_hn_alexnets import plotting_definition_alexnets_simclr_hn as decision_maker_fun

analysis = a.ShapeBias()
datasets = get_experiments(["cue-conflict"])
datasets

In [None]:
# decision_maker_fun

In [None]:
assert len(datasets) == 1
ds = datasets[0]
assert ds.name == "cue-conflict"

df = ph.get_experimental_data(ds)
df

In [None]:
decision_maker_to_shape_bias_dict = {}
colors = []
labels = []
label_colors = []
for dmaker in decision_maker_fun(df):
    if len(dmaker.decision_makers) > 1:
        decision_maker_to_shape_bias_humans_dict = {}
        for dmaker_human in dmaker.decision_makers:
            df_selection = df.loc[(df["subj"].isin([dmaker_human]))]
            class_avgs = df_selection.groupby(["category"]).apply(lambda x: analysis.analysis(df=x)["shape-bias"])
            decision_maker_to_shape_bias_humans_dict[dmaker_human] = class_avgs.tolist()
        df_results_humans = pd.DataFrame(decision_maker_to_shape_bias_humans_dict)
        df_results_humans["humans"] = df_results_humans.mean(axis=1)

    else:
        subject_name = dmaker.decision_makers[0]
        df_selection = df.loc[(df["subj"].isin(dmaker.decision_makers))]
        class_avgs = df_selection.groupby(["category"]).apply(lambda x: analysis.analysis(df=x)["shape-bias"])
        decision_maker_to_shape_bias_dict[subject_name] = class_avgs.tolist()
    colors.append(dmaker.color)
    if subject_name in consts.TORCHVISION_MODELS:
        label_colors.append(rgb(150, 150, 150))
    else:
        label_colors.append(dmaker.color)
    labels.append(dmaker.plotting_name)

decision_maker_to_shape_bias_dict["humans"] = df_results_humans.humans.tolist()
df_results = pd.DataFrame(decision_maker_to_shape_bias_dict)
df_results

In [None]:
import scipy.stats as st

# Calculate mean, lower, and upper 95% CI for each column
result = []
for col in df_results.columns:
    mean = df_results[col].mean()
    ci = st.t.interval(0.95, len(df_results[col])-1, loc=mean, scale=st.sem(df_results[col]))
    result.append({
        "subj": col,
        "mean": mean,
        "lower_95_CI": ci[0],
        "upper_95_CI": ci[1]
    })
    
summary_df = pd.DataFrame(result)
# Add prop_human column based on the humans' mean value
human_score = summary_df[summary_df['subj'] == 'humans']['mean'].values[0]
summary_df['prop_human'] = summary_df['mean'] / human_score

# Sort by mean, but ensure humans are first
summary_df = summary_df.sort_values(by='mean', ascending=False)
humans_row = summary_df[summary_df['subj'] == 'humans']
summary_df = pd.concat([humans_row, summary_df[summary_df['subj'] != 'humans']])

summary_df = summary_df.reset_index(drop=True)
summary_df

In [None]:
import copy

def print_shape_bias_table(df):
    # Remove humans row for ranking
    df = copy.deepcopy(df[df["subj"] != "humans"])
    df['rank'] = df['mean'].rank(ascending=False)
    df = df.sort_values(by="rank", ascending=True)
    df = df.reset_index(drop=True)

    # Select and rename columns
    df = df[["subj", "mean", "lower_95_CI", "upper_95_CI", "prop_human", "rank"]]
    df.rename(columns={
        "subj": "model",
        "mean": "Mean",
        "lower_95_CI": "Lower 95% CI",
        "upper_95_CI": "Upper 95% CI",
        "prop_human": "Prop. Human",
        "rank": "Rank $\\downarrow$"
    }, inplace=True)

    # Escape LaTeX special characters
    df["model"] = df["model"].apply(lambda x: x.replace("_", "\\_"))

    # Formatting
    formatters = {}
    cols_bold_mapping = {"Mean": max, "Rank $\\downarrow$": min}

    def format_numbers(y, num_digits=3):
        return f"{y:.{num_digits}f}"

    for c, func in cols_bold_mapping.items():
        m = func(df[c])
        formatters[c] = lambda y, m=m: f"\\textbf{{{format_numbers(y)}}}" if y == m else format_numbers(y)

    # Print LaTeX table
    latex_table = df.to_latex(
        escape=False,
        formatters=formatters,
        float_format="%.3f",
        index=False
    )
    print(latex_table)

In [None]:
print_shape_bias_table(summary_df)

In [None]:
def print_shape_bias_table_with_humans(df):
    # Separate humans row
    humans_row = df[df["subj"] == "humans"]
    df = copy.deepcopy(df[df["subj"] != "humans"])

    # Rank and sort the rest of the data
    df['rank'] = df['mean'].rank(ascending=False)
    df = df.sort_values(by="rank", ascending=True)
    df = df.reset_index(drop=True)

    # Add rank column for non-human rows only
    humans_row['rank'] = None
    df = pd.concat([humans_row, df], ignore_index=True)

    # Select and rename columns
    df = df[["subj", "mean", "lower_95_CI", "upper_95_CI", "prop_human", "rank"]]
    df.rename(columns={
        "subj": "model",
        "mean": "Mean",
        "lower_95_CI": "Lower 95\\% CI",
        "upper_95_CI": "Upper 95\\% CI",
        "prop_human": "Prop. Human",
        "rank": "Rank $\\downarrow$"
    }, inplace=True)

    # Escape LaTeX special characters
    df["model"] = df["model"].apply(lambda x: x.replace("_", "\\_"))
    df["Prop. Human"] = df["Prop. Human"].apply(lambda x: f"{x:.3f}".replace("%", "\\%"))

    # Formatting
    formatters = {}

    # Define the function to format numbers
    def format_numbers(y, num_digits=3):
        return f"{y:.{num_digits}f}"

    # Determine the max value for the Mean column, excluding humans
    max_mean_non_human = df[df["model"] != "humans"]["Mean"].max()

    # Define formatters for the Mean and Rank columns
    def mean_formatter(value):
        return f"\\textbf{{{format_numbers(value)}}}" if value == max_mean_non_human and not pd.isna(value) else format_numbers(value)

    def rank_formatter(value):
        return f"\\textbf{{{format_numbers(value)}}}" if value == 1 else format_numbers(value)

    formatters["Mean"] = mean_formatter
    formatters["Rank $\\downarrow$"] = rank_formatter

    # Print LaTeX table
    latex_table = df.to_latex(
        escape=False,
        formatters=formatters,
        float_format="%.3f",
        index=False
    )
    print(latex_table)

In [None]:
print_shape_bias_table_with_humans(summary_df)