In [5]:
import sys, os, time
import numpy as np
import scipy.io as sio

In [10]:
# Accuracy metric
def acc(r):
    acc = round(r[len(r)-1].sum() / len(r), 2)
    acc_std = round(r[len(r)-1].std(), 2)
    return acc, acc_std
# Backward Transfer metric
def bwt(r):
    t = len(r)
    tmp = []
    for i in range(t-1):
        tmp.append((r[t-1][i] - r[i][i]))
    tmp_arr = np.array(tmp)
    bwt = round(tmp_arr.sum()/(t-1), 2)
    bwt_std = round(tmp_arr.std(), 2)
    return bwt, bwt_std

In [11]:
# Experiment details
experiments = [
    ('2023-01-19','doorkey-wallgap-lavagap-redbluedoor', [
    (0, 'MiniGrid-DoorKey-6x6-v0'), 
    (1, 'MiniGrid-WallGapS6-v0'), 
    (2, 'MiniGrid-LavaGapS6-v0'),
    (3, 'MiniGrid-RedBlueDoors-6x6-v0')       
    ]), 
    ('2023-01-21','redbluedoor-lavagap-doorkey-wallgap', [
    (0, 'MiniGrid-RedBlueDoors-6x6-v0'), 
    (1, 'MiniGrid-LavaGapS6-v0'), 
    (2, 'MiniGrid-DoorKey-6x6-v0'), 
    (3, 'MiniGrid-WallGapS6-v0')
    ]), 
    ('2023-01-21','doorkey-wallgap-lavagap-redbluedoor-emptyrand', [
    (0, 'MiniGrid-DoorKey-6x6-v0'), 
    (1, 'MiniGrid-WallGapS6-v0'), 
    (2, 'MiniGrid-LavaGapS6-v0'),
    (3, 'MiniGrid-RedBlueDoors-6x6-v0'),
    (4, 'MiniGrid-Empty-Random-6x6-v0')        
    ]), 
    ('2023-01-22','lavagap-doorkey-emptyrand-redbluedoor-wallgap', [
    (0, 'MiniGrid-LavaGapS6-v0'),
    (1, 'MiniGrid-DoorKey-6x6-v0'), 
    (2, 'MiniGrid-Empty-Random-6x6-v0'), 
    (3, 'MiniGrid-RedBlueDoors-6x6-v0'),
    (4, 'MiniGrid-WallGapS6-v0')
    ]),
    ('2023-01-27','unlockpick-doorkey-wallgap-lavagap-redbluedoor-emptyrand', [
    (0, 'MiniGrid-UnlockPickup-v0'), 
    (1, 'MiniGrid-DoorKey-6x6-v0'), 
    (2, 'MiniGrid-WallGapS6-v0'), 
    (3, 'MiniGrid-LavaGapS6-v0'),
    (4, 'MiniGrid-RedBlueDoors-6x6-v0'),
    (5, 'MiniGrid-Empty-Random-6x6-v0')
    ]),
    ('2023-01-28','distshift-doorkey-wallgap-lavagap-redbluedoor-emptyrand', [
    (0, 'MiniGrid-DistShift1-v0'),
    (1, 'MiniGrid-DoorKey-6x6-v0'), 
    (2, 'MiniGrid-WallGapS6-v0'), 
    (3, 'MiniGrid-LavaGapS6-v0'),
    (4, 'MiniGrid-RedBlueDoors-6x6-v0'),
    (5, 'MiniGrid-Empty-Random-6x6-v0')
    ]),
    ('2023-01-29','doorkeylava-doorkey-wallgap-lavagap-redbluedoor', [
    (0, 'MiniGrid-DoorKeyLava-6x6-v0'), 
    (1, 'MiniGrid-DoorKey-6x6-v0'), 
    (2, 'MiniGrid-WallGapS6-v0'), 
    (3, 'MiniGrid-LavaGapS6-v0'),
    (4, 'MiniGrid-RedBlueDoors-6x6-v0')  
    ])    
    ]

header = 'minigrid'
steps = '5e5'
approaches = ['blip','ewc','fine-tuning']
stages = ['train', 'eval']
seeds = [1,2,3]
F_prior = 1e-16
ewc_lambda = 5000.0

for item in experiments:

    date = item[0]
    experiment = item[1]
    tasks_sequence = item[2]
    t = len(tasks_sequence)


    print('Experiment:',experiment, '\n')
    for approach in approaches:
        r_all = []
        for seed in seeds:
            r = np.zeros([len(tasks_sequence), len(tasks_sequence)])
            if approach == 'fine-tuning' or approach == 'ft-fix':
                log_name = '{}_{}_{}_{}_{}_{}'.format(date, header, steps, experiment, approach, seed)
            elif approach == 'ewc' in approach:
                log_name = '{}_{}_{}_{}_{}_{}_lamb_{}'.format(date, header, steps, experiment, approach, seed, ewc_lambda)
            elif approach == 'blip':
                log_name = '{}_{}_{}_{}_{}_{}_F_prior_{}'.format(date, header, steps, experiment, approach, seed, F_prior)
            mat_file = os.path.join('./result_data/', log_name + '_result.mat')
            mat_contents = sio.loadmat(mat_file)
            te_reward_arr = mat_contents['te_reward_arr'][0][0][0][0][0][0][0]
            num_samples = len(te_reward_arr[0][0])
            len_task = int(num_samples / t)
            for i in range(t):
                index = (i+1)*len_task - 1
                for j in range(t):        
                    r[i][j] = te_reward_arr[j][0][index]
            r_all.append(r)

        r_median = np.median(r_all, axis=0)
        print("Approach:", approach)
        print("ACC: {}".format(acc(r_median)))
        print("BWT: {}\n".format(bwt(r_median)))

Experiment: doorkey-wallgap-lavagap-redbluedoor 

Approach: blip
ACC: (0.39, 0.35)
BWT: (-0.38, 0.4)

Approach: ewc
ACC: (0.73, 0.25)
BWT: (0.01, 0.02)

Approach: fine-tuning
ACC: (0.0, 0.0)
BWT: (-0.92, 0.06)

Experiment: redbluedoor-lavagap-doorkey-wallgap 

Approach: blip
ACC: (0.71, 0.24)
BWT: (0.23, 0.29)

Approach: ewc
ACC: (0.7, 0.22)
BWT: (-0.09, 0.2)

Approach: fine-tuning
ACC: (0.24, 0.41)
BWT: (-0.84, 0.1)

Experiment: doorkey-wallgap-lavagap-redbluedoor-emptyrand 

Approach: blip
ACC: (0.63, 0.41)
BWT: (0.23, 0.36)

Approach: ewc
ACC: (0.72, 0.24)
BWT: (-0.0, 0.06)

Approach: fine-tuning
ACC: (0.22, 0.37)
BWT: (-0.65, 0.39)

Experiment: lavagap-doorkey-emptyrand-redbluedoor-wallgap 

Approach: blip
ACC: (0.72, 0.37)
BWT: (-0.05, 0.07)

Approach: ewc
ACC: (0.76, 0.38)
BWT: (-0.0, 0.0)

Approach: fine-tuning
ACC: (0.38, 0.47)
BWT: (-0.67, 0.39)

Experiment: unlockpick-doorkey-wallgap-lavagap-redbluedoor-emptyrand 

Approach: blip
ACC: (0.34, 0.43)
BWT: (-0.06, 0.13)

Approach