In [25]:
import sys, os, time
import numpy as np
import pandas as pd
import scipy.io as sio

In [26]:
# Accuracy metric
def acc(r_val, r_std):
    last_row = r_val[len(r_val)-1]
    last_row_std = r_std[len(r_std)-1]

    acc = round(last_row.sum() / len(r_val), 4)

    # Calculate the standard deviation for acc
    acc_std = round(np.sqrt((last_row_std**2).sum()) / len(r_val), 4)

    return acc, acc_std

# Backward Transfer metric
def bwt(r_val, r_std):
    t = len(r_val)
    tmp = []
    std_diffs = []
    for i in range(t-1):
        diff = r_val[t-1][i] - r_val[i][i]
        tmp.append(diff)
        
        # Calculate the standard deviation for each difference
        std_diff = np.sqrt(r_std[t-1][i]**2 + r_std[i][i]**2)
        std_diffs.append(std_diff)
    
    tmp_arr = np.array(tmp)
    std_diffs_arr = np.array(std_diffs)

    bwt = round(tmp_arr.sum() / (t-1), 4)
    
    # Calculate the standard deviation for bwt
    bwt_std = round(np.sqrt((std_diffs_arr**2).sum()) / (t-1), 4)
    
    return bwt, bwt_std

In [27]:
exp = 'minigrid-wallgap-doorkey-redbluedoor-crossing'
steps = '500000'
approaches = ['PPOPC (1)', 'PPOPC (2)','PPO']
stages = ['train', 'eval']
seeds = [123456, 789012, 345678]

# Experiment details
experiments = [
    ('2023-11-13', exp, [
    (0, 'MiniGrid-WallGapS6-v0'),
    (1, 'MiniGrid-DoorKey-6x6-v0'),
    (2, 'MiniGrid-RedBlueDoors-6x6-v0'), 
    (3, 'MiniGrid-SimpleCrossingS9N1-v0')  
    ])    
    ]

r_val_df = pd.read_pickle('/Users/inigo/Documents/cursos/Data Science/master_viu/work/tfm-experiments/policy-consolidation/metrics/r_val.pkl')
r_std_df = pd.read_pickle('/Users/inigo/Documents/cursos/Data Science/master_viu/work/tfm-experiments/policy-consolidation/metrics/r_std.pkl')

for item in experiments:

    date = item[0]
    experiment = item[1]
    tasks_sequence = item[2]
    t = len(tasks_sequence)

    print('Experiment:',experiment, '\n')
    for i, approach in enumerate(approaches):
        r_val = r_val_df[i]
        r_std = r_std_df[i]
        print("Approach:", approach)
        print("ACC: {}".format(acc(r_val, r_std)))
        print("BWT: {}\n".format(bwt(r_val, r_std)))

Experiment: minigrid-wallgap-doorkey-redbluedoor-crossing 

Approach: PPOPC (1)
ACC: (0.2391, 0.0014)
BWT: (-0.6458, 0.0477)

Approach: PPOPC (2)
ACC: (0.2394, 0.0008)
BWT: (-0.6576, 0.0923)

Approach: PPO
ACC: (0.2396, 0.0006)
BWT: (-0.8841, 0.1141)

