In [1]:
import sys, os, time
import numpy as np
import pandas as pd
import scipy.io as sio
import pickle

In [2]:
# Accuracy metric
def acc(r_val, r_std):
    last_row = r_val[len(r_val)-1]
    last_row_std = r_std[len(r_std)-1]

    acc = round(last_row.sum() / len(r_val), 5)

    # Calculate the standard deviation for acc
    acc_std = round(np.sqrt((last_row_std**2).sum()) / len(r_val), 5)

    return acc, acc_std

# Backward Transfer metric
def bwt(r_val, r_std):
    t = len(r_val)
    tmp = []
    std_diffs = []
    for i in range(t-1):
        diff = r_val[t-1][i] - r_val[i][i]
        tmp.append(diff)
        
        # Calculate the standard deviation for each difference
        std_diff = np.sqrt(r_std[t-1][i]**2 + r_std[i][i]**2)
        std_diffs.append(std_diff)
    
    tmp_arr = np.array(tmp)
    std_diffs_arr = np.array(std_diffs)

    bwt = round(tmp_arr.sum() / (t-1), 5)
    
    # Calculate the standard deviation for bwt
    bwt_std = round(np.sqrt((std_diffs_arr**2).sum()) / (t-1), 5)
    
    return bwt, bwt_std

In [4]:
exp = 'minigrid-wallgap-doorkey-redbluedoor-crossing'

approaches = ['rims']
metrics_dir = './metrics/'
seeds = [123456, 789012, 345678]

date = "2023-11-17"
model = "wallgap-doorkey-redblue-crossing"
algo = "ppo"
frames = 1000000
num_units = 6
k = 4
meta_learn = True
reshape_reward = False

# Experiment details
experiments = [
    ('2023-11-17', exp, [
    (0, 'MiniGrid-WallGapS6-v0'),
    (1, 'MiniGrid-DoorKey-6x6-v0'),
    (2, 'MiniGrid-RedBlueDoors-6x6-v0'), 
    (3, 'MiniGrid-SimpleCrossingS9N1-v0')  
    ])    
    ]

for item in experiments:

    date = item[0]
    experiment = item[1]
    tasks_sequence = item[2]
    t = len(tasks_sequence)

    print('Experiment:',experiment, '\n')
    for i, approach in enumerate(approaches):

        # create name of data export file
        exp_name = '{}_{}_{}_{}_metalearn_{}_rims_{}_k_{}_reshape_{}'.format(date, model, algo, frames, meta_learn, num_units, k, reshape_reward)
        

        r_val_file = os.path.join(metrics_dir, exp_name + "_final_r_val.pkl")
        r_std_file = os.path.join(metrics_dir, exp_name + "_final_r_std.pkl")

        r_val_df = pd.read_pickle(r_val_file)
        r_std_df = pd.read_pickle(r_std_file)

        r_val = r_val_df
        r_std = r_std_df

        print("Approach:", approach)
        print("ACC: {}".format(acc(r_val, r_std)))
        print("BWT: {}\n".format(bwt(r_val, r_std)))

Experiment: minigrid-wallgap-doorkey-redbluedoor-crossing 

Approach: rims
ACC: (0.60046, 0.12844)
BWT: (-0.43939, 0.1805)



In [12]:

print(r_val_df)

[[0.924375   0.94508333 0.537875   0.92268518]
 [0.924375   0.94508333 0.537875   0.92268518]
 [0.924375   0.94508333 0.537875   0.92268518]
 [0.924375   0.94508333 0.537875   0.92268518]]
