# Results analysis
This notebook was used to analyse the results and produce graphics included in the paper.

In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
def read_tensorboard_scalars(logdir, tag_name):
    """Read a scalar from a single TensorBoard log directory."""
    event_acc = EventAccumulator(logdir)
    event_acc.Reload()

    #print(event_acc.Tags()['scalars'])  # Look for the exact tag name here

    if tag_name not in event_acc.Tags()['scalars']:
        raise ValueError(f"Tag '{tag_name}' not found in TensorBoard logs at {logdir}.")

    events = event_acc.Scalars(tag_name)
    steps = [e.step for e in events]
    values = [e.value for e in events]

    return pd.DataFrame({'step': steps, 'value': values, 'run': logdir})

def aggregate_experiments(experiments, tag_name):
    """Combine scalar data from multiple TensorBoard runs into a single DataFrame."""
    df_list = []

    for logdir in experiments:
        try:
            df = read_tensorboard_scalars(logdir, tag_name)
            df_list.append(df)
        except ValueError as e:
            print(e)

    return pd.concat(df_list, ignore_index=True)

tag_name = "combined Reward-mean"
results_path = "/home/dave/dev/Reasoning/runs/stimulus-response/2026/02/"
experiments_1 = [
    results_path + "20/11:00:51", 
    results_path + "20/11:19:29", 
    results_path + "20/12:01:30", 
    results_path + "20/12:42:20", 
    results_path + "20/13:21:58", 

    results_path + "21/19:36:15", 
    results_path + "21/20:05:21", 
    results_path + "21/20:26:36", 
]
experiments_2 = [
    results_path + "20/14:10:28", 
    results_path + "20/14:35:00", 
    results_path + "20/14:58:15", 
    results_path + "20/15:21:28", 
    results_path + "20/15:44:49", 

    results_path + "21/17:47:18", 
    results_path + "21/18:14:43", 
    results_path + "21/18:44:37", 
]
experiments_3 = [
    results_path + "20/16:06:03", 
    results_path + "20/16:52:52", 
    results_path + "20/17:42:44", 
    results_path + "20/19:00:14",  # used for individual plot (1x)
    results_path + "20/20:37:40",
    
    results_path + "21/07:02:00",
    results_path + "21/14:17:19",
    results_path + "21/15:07:21",
]

combined_df_1 = aggregate_experiments(experiments_1, tag_name)
combined_df_2 = aggregate_experiments(experiments_2, tag_name)
combined_df_3 = aggregate_experiments(experiments_3, tag_name)



In [None]:
print(combined_df_1.columns)
#for i in range(len(combined_df_1)):
#    print(combined_df_1.iloc[i]["step"])

In [None]:
# Define periods
# training_steps = 12000
# evaluate_steps = 1000
# few_shot_training_steps = [10, 10, 20, 40, 80, 160, 320, 640, 1280]    

periods = [
    # Pretraining phase (inclusive)
    ["Training (old)",   100, 12000, 0],
    ["Original data", 12100, 13000, 0],
    ["Few-shot data", 13100, 14000, 0],

    # Few-shot phase (inclusive)
    ["Training (new)", 14011, 14011, 10],  # +10
    ["Original data", 14100, 15011, 10],
    ["Few-shot data", 15100, 16011, 10],

    ["Training (new)", 16021, 16021, 20],  # +10
    ["Original data", 16100, 17021, 20],
    ["Few-shot data", 17100, 18021, 20],

    ["Training (new)", 18041, 18041, 40],  # +20
    ["Original data", 18100, 19041, 40],
    ["Few-shot data", 19100, 20041, 40],

    ["Training (new)", 20081, 20081, 80],  # +40
    ["Original data", 20100, 21081, 80],
    ["Few-shot data", 21100, 22081, 80],

    ["Training (new)", 22100, 22161, 160],  # +80
    ["Original data", 22200, 23161, 160],
    ["Few-shot data", 23200, 24161, 160],

    ["Training (new)", 24200, 24321, 320],  # +160
    ["Original data", 24400, 25321, 320],
    ["Few-shot data", 25400, 26321, 320],

    ["Training (new)", 26400, 26641, 640],  # +320
    ["Original data", 26700, 27641, 640],
    ["Few-shot data", 27700, 28641, 640],

    ["Training (new)", 28700, 29281, 1280],  # +640
    ["Original data", 29300, 30281, 1280],
    ["Few-shot data", 30300, 31281, 1280],

    ["Training (new)", 31300, 32561, 2560],  # +1280
    ["Original data", 32600, 33561, 2560],
    ["Few-shot data", 33600, 34561, 2560],
]

# Split into periods
def split_few_shot_periods(df, offset:int) : #, label:str):
    rows = []
    t1 = 0
    t2 = 0
    for period in periods:
        period_name = period[0] 
        if period_name.startswith("Training"):
            continue

        t1 = period[1] + offset
        t2 = period[2] + offset
        cumulative_steps = period[3]
        # between() has inclusive bounds        
        df_period = df[df['step'].between(t1, t2)].copy()
        combined_name = f"{period_name}[{cumulative_steps}"
        print(f"{combined_name}: {t1} --> {t2} has {len(df_period)} samples.")

        mean = df_period["value"].mean()
        std = df_period["value"].std()
        min = df_period["value"].min()
        max = df_period["value"].max()
        cols = {
            "label": period_name,
            "t": cumulative_steps,
            "mean": mean,
            "std": std,
            "min": min,
            "max": max,
        }
        rows.append(cols)
    df = pd.DataFrame(rows)
    #df["label"] = label
    return df

df_1 = split_few_shot_periods(combined_df_1, offset=0)
df_2 = split_few_shot_periods(combined_df_2, offset=0)
df_3 = split_few_shot_periods(combined_df_3, offset=-6000)


In [None]:
def split_training_period(df, offset:int) : #, label:str):
    t1 = 0
    t2 = 0
    for period in periods:
        period_name = period[0] 
        if not period_name.startswith("Training (old)"):
            continue

        t1 = max(100, period[1] + offset)
        t2 = period[2] + offset

        # between() has inclusive bounds        
        df_period = df[df['step'].between(t1, t2)].copy()
        print(f"{period_name}: {t1} --> {t2} has {len(df_period)} samples.")
        return df_period
    return None

df_training_1 = split_training_period(combined_df_1, offset=0)
df_training_2 = split_training_period(combined_df_2, offset=0)
df_training_3 = split_training_period(combined_df_3, offset=-6000)

In [None]:
def moving_average(df, window_size=8):
    # Sort the DataFrame to ensure time series order
    df = df.sort_values(by=['step'])
    
    # Apply the rolling mean grouped by 'label'
    df['value-smoothed'] = df.groupby('run')['value'].transform(lambda s: s.rolling(window=window_size, min_periods=1).mean())
    
    return df.copy()

df_training_1_sm = moving_average(df_training_1)
df_training_1_sm["label"] = "Entangled (dense ANN); LR: 0.0001"

df_training_2_sm = moving_average(df_training_2)
df_training_2_sm["label"] = "Entangled (dense ANN); LR: 0.001"

df_training_3_sm = moving_average(df_training_3)
df_training_3_sm["label"] = "Disentangled memory; LR: 0.1"

df_training = pd.concat([df_training_1_sm, df_training_2_sm, df_training_3_sm])

In [None]:

def plot(
    plot_data,
    title, 
    x_label,
    y_label, 
    file_name,
):
    #plt.figure(figsize=(10, 8))
    for label in plot_data['label'].unique():
        subset = plot_data[plot_data['label'] == label]
        sns.lineplot(data=subset, x='step', y='value-smoothed', label=label)
    
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.legend(title="Condition")
    plt.savefig(file_name)
    plt.show()

plot(
    plot_data = df_training, 
    title = "Reward during training (Epsilon=0.1)",
    x_label = "Minibatch (batch size: 16)",
    y_label = "Mean reward (per step)",
    file_name = "training.png"
)

In [None]:

def plot_subplots(
    plot_data:list[pd.DataFrame],
    subtitles:list[str],
    title, 
    x_label,
    y_label, 
    file_name,
):
    """
    This plot is for the few-shot results.
    """
    num_subplots = len(plot_data)
    fig, axes = plt.subplots(1, num_subplots, figsize=(15, 5), sharey=True)

    for i in range(num_subplots):
        axes[i].set_title(subtitles[i])
        axes[i].set_xscale('log')  # Set the x-axis to log scale
        axes[i].set_xlabel(x_label)
        #axes[i].tick_params(labelleft=True)
        if i == 0:
           axes[i].set_ylabel(y_label)

        sub_plot_data = plot_data[i]
        for label in sub_plot_data['label'].unique():
            #print(f"Label={label}")
            subset = sub_plot_data[sub_plot_data['label'] == label]
            
            sns.lineplot(ax=axes[i], data=subset, x='t', y='mean', marker="o", label=label)
            
            # Add the shaded standard deviation
            axes[i].fill_between(
                subset['t'],
                subset['mean'] - subset['std'],
                subset['mean'] + subset['std'],
                #subset['min'],
                #subset['max'],
                alpha=0.2,
            )
    
    plt.suptitle(title, fontsize=16)
    plt.legend(title="Condition")
    plt.tight_layout()
    plt.savefig(file_name)
    plt.show()


In [None]:
plot_subplots(
    plot_data = [df_2, df_1, df_3], 
    subtitles = [
        "Entangled (dense ANN); LR: 0.001",
        "Entangled (dense ANN); LR: 0.0001",
        "Disentangled memory; LR: 0.1",
    ],
    title = "Performance on original and new data during few-shot learning.", 
    x_label = "Few-shot batches (size:16); max. Eps. len.:10",
    y_label = "Mean reward (per step)", 
    file_name = 'few_shot_evaluation.png',
)


In [None]:
print(df_3.columns)
df_3['step'] = df_3['t']
df_3['value-smoothed'] = df_3['mean']

def plot_1x(
    plot_data,
    title, 
    x_label,
    y_label, 
    file_name,
):
    #plt.figure(figsize=(10, 8))
    for label in plot_data['label'].unique():
        subset = plot_data[plot_data['label'] == label]
        sns.lineplot(data=subset, x='step', y='value-smoothed', marker="o", label=label)
    
    plt.xscale('log')  # Set the x-axis to log scale
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.legend(title="Condition", loc='lower right')
    plt.savefig(file_name)
    plt.show()


plot_1x(
    plot_data = df_3, 
    title = "Performance on original and new data during few-shot learning (1x).", 
    x_label = "Few-shot batches (size:16); max. Eps. len.:10",
    y_label = "Mean reward (per step)",
    file_name = "few_shot_evaluation_separate.png"
)