In [74]:
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')  # Force the TkAgg backend on Windows
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


In [75]:
df1 = pd.read_csv("data/group1.csv")
df2 = pd.read_csv("data/group2_filtered.csv")

add_task0 = True

if add_task0:
    # Add task 0 with value 25 to both groups
    task0_df = pd.DataFrame({
        'task number': [0],
        'estimation 1': [25],
        'estimation 2': [25]
    })
    df1 = pd.concat([task0_df, df1], ignore_index=True)
    df2 = pd.concat([task0_df, df2], ignore_index=True)


fst = pd.read_csv("data/fst.csv")
# Merge on 'task number'
df1 = pd.merge(df1, fst, on="task number", how="left")
df2 = pd.merge(df2, fst, on="task number", how="left")

#reaplce NaN with 0
df1 = df1.fillna(0)
df2 = df2.fillna(0)

# Ensure task number is consistent in sorting
df1["task number"] = df1["task number"].str.extract(r'(\d+)').fillna(0).astype(int)
df2["task number"] = df2["task number"].str.extract(r'(\d+)').fillna(0).astype(int)


print(df2.head())


   task number  estimation 1  estimation 2    name   fst
0            0         25.00          25.0       0   0.0
1            1         25.00          25.5  Kamile   7.0
2            1         25.25          30.0  Uttara   7.0
3            1         25.50          25.5    Omer   7.0
4            2         27.00          27.0  Kamile  42.0


In [76]:
def calculate_stats(df, estimation_col):
    """
    Example of grouping by 'fst' and returning (means, standard_errors)
    for the specified estimation_col.
    """
    grouped = df.groupby("fst")[estimation_col]
    means = grouped.mean()
    # Standard error is std / sqrt(n)
    standard_errors = grouped.std() / grouped.count()**0.5
    return means, standard_errors

In [77]:
df_joint = pd.concat([df1, df2], ignore_index=True)

sns.set_theme(style="whitegrid", palette="Set2")
# Prepare figure and axes
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(8, 9), sharex=True)
fig.suptitle("First and Second Estimates Across Groups", fontsize=14)

# We'll track the global min/max for the y-axis across subplots
global_ymin, global_ymax = float('inf'), float('-inf')

for ax, df, title in zip(
    axes, 
    [df1, df2, df_joint], 
    ["Group 1", "Group 2", "Group 1 + Group 2"]
):
    # Calculate means and errors for Estimation 1
    means1, errors1 = calculate_stats(df, "estimation 1")
    ci_lower1 = means1 - 1.96 * errors1
    ci_upper1 = means1 + 1.96 * errors1
    
    # Calculate means and errors for Estimation 2
    means2, errors2 = calculate_stats(df, "estimation 2")
    ci_lower2 = means2 - 1.96 * errors2
    ci_upper2 = means2 + 1.96 * errors2
    
    # Update global y-limits
    local_min = min(ci_lower1.min(), ci_lower2.min())
    local_max = max(ci_upper1.max(), ci_upper2.max())
    global_ymin = min(global_ymin, local_min)
    global_ymax = max(global_ymax, local_max)

    # Plot Estimation 1 (line + CI)
    ax.plot(means1.index, means1.values, label="Estimation 1 mean")
    ax.fill_between(
        means1.index, ci_lower1.values, ci_upper1.values, alpha=0.2, label="Estimation 1 (95% CI)"
    )

    # Plot Estimation 2 (line + CI)
    ax.plot(means2.index, means2.values, label="Estimation 2 mean")
    ax.fill_between(
        means2.index, ci_lower2.values, ci_upper2.values, alpha=0.2, label="Estimation 2 (95% CI)"
    )
    
    ax.set_title(title)
    #add percentage formatter
    ax.yaxis.set_major_formatter(
        plt.FuncFormatter(lambda x, _: f"{int(x)}%")
    )

# Enforce the same y-limits on all subplots
# for ax in axes:
#     ax.set_ylim(global_ymin, global_ymax)
#     # Format y-axis as int percentages
#     ax.yaxis.set_major_formatter(
#         plt.FuncFormatter(lambda x, p: f"{int(x)}%")
#     )

# We want only one legend for the entire figure
# Let's grab the handles and labels from the last subplot
handles, labels = axes[-1].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")


ax.xaxis.set_major_formatter(
    plt.FuncFormatter(lambda x, _: f"{int(x)} min")
)

# Label the x-axis on the bottom subplot
axes[-1].set_xlabel("Cybench First Solve Time")
axes[-1].set_ylabel("Probability Estimate")


plt.tight_layout()

#save the plot
plt.savefig("raw_data_vizualisation.png")

#plt.show()

In [78]:

sns.set_theme(style="whitegrid", palette="Set2")

# Prepare the figure
fig, ax = plt.subplots(figsize=(8, 6))
fig.suptitle("Second Estimates Across Groups", fontsize=14)

groups = [
    ("Group 1", df1),
    ("Group 2", df2),
    ("Group 1 + Group 2", df_joint),
]

colors = sns.color_palette("Set2", n_colors=len(groups))

global_ymin, global_ymax = float('inf'), float('-inf')

for (label, df), color in zip(groups, colors):
    # Calculate means and SE by 'fst'
    means, errors = calculate_stats(df, "estimation 2")
    ci_lower = means - 1.96 * errors
    ci_upper = means + 1.96 * errors

    # Update global y-limits
    global_ymin = min(global_ymin, ci_lower.min())
    global_ymax = max(global_ymax, ci_upper.max())

    # Plot
    ax.plot(means.index, means.values, label=f"{label} mean", color=color)
    ax.fill_between(
        means.index,
        ci_lower.values,
        ci_upper.values,
        alpha=0.2,
        color=color,
        label=f"{label} (95% CI)"
    )

# Set consistent y-limits
ax.set_ylim(global_ymin, global_ymax)

# If your values are percentages, format y-axis as integer percentages
ax.yaxis.set_major_formatter(
    plt.FuncFormatter(lambda x, _: f"{int(x)}%")
)

ax.xaxis.set_major_formatter(
    plt.FuncFormatter(lambda x, _: f"{int(x)} min")
)

ax.set_xlabel("Cybench First Solve Time")
ax.set_ylabel("Probability Estimate")
ax.legend(loc="best")

plt.tight_layout()
plt.savefig("estimation2_vs_fst.png", dpi=300)
#plt.show()
