In [1]:
import sys, os, time
import numpy as np
import scipy.io as sio

In [2]:
def acc(r):
    acc = r[len(r)-1].sum() / len(r)
    acc_std = r[len(r)-1].std()
    return acc, acc_std

def bwt(r):
    t = len(r)
    tmp = []
    for i in range(t-1):
        tmp.append((r[t-1][i] - r[i][i]))
    tmp_arr = np.array(tmp)
    bwt = tmp_arr.sum()/(t-1)
    bwt_std = tmp_arr.std()
    return bwt, bwt_std

In [13]:
# Experiment details
experiments = [
    ('2023-01-19','doorkey-wallgap-lavagap-redbluedoor', [
    (0, 'MiniGrid-DoorKey-6x6-v0'), 
    (1, 'MiniGrid-WallGapS6-v0'), 
    (2, 'MiniGrid-LavaGapS6-v0'),
    (3, 'MiniGrid-RedBlueDoors-6x6-v0')       
    ]), 
    ('2023-01-21','redbluedoor-lavagap-doorkey-wallgap', [
    (0, 'MiniGrid-RedBlueDoors-6x6-v0'), 
    (1, 'MiniGrid-LavaGapS6-v0'), 
    (2, 'MiniGrid-DoorKey-6x6-v0'), 
    (3, 'MiniGrid-WallGapS6-v0')
    ]), 
    ('2023-01-21','doorkey-wallgap-lavagap-redbluedoor-emptyrand', [
    (0, 'MiniGrid-DoorKey-6x6-v0'), 
    (1, 'MiniGrid-WallGapS6-v0'), 
    (2, 'MiniGrid-LavaGapS6-v0'),
    (3, 'MiniGrid-RedBlueDoors-6x6-v0'),
    (4, 'MiniGrid-Empty-Random-6x6-v0')        
    ]), 
    ('2023-01-22','lavagap-doorkey-emptyrand-redbluedoor-wallgap', [
    (0, 'MiniGrid-LavaGapS6-v0'),
    (1, 'MiniGrid-DoorKey-6x6-v0'), 
    (2, 'MiniGrid-Empty-Random-6x6-v0'), 
    (3, 'MiniGrid-RedBlueDoors-6x6-v0'),
    (4, 'MiniGrid-WallGapS6-v0')
    ])]

header = 'minigrid'
steps = '5e5'
approaches = ['blip','ewc','fine-tuning']
stages = ['train', 'eval']
seeds = [1,2,3]
F_prior = 5e-18
ewc_lambda = 5000.0

for item in experiments:

    date = item[0]
    experiment = item[1]
    tasks_sequence = item[2]
    t = len(tasks_sequence)


    print('Experiment:',experiment, '\n')
    for approach in approaches:
        r_all = []
        for seed in seeds:
            r = np.zeros([len(tasks_sequence), len(tasks_sequence)])
            if approach == 'fine-tuning' or approach == 'ft-fix':
                log_name = '{}_{}_{}_{}_{}_{}'.format(date, header, steps, experiment, approach, seed)
            elif approach == 'ewc' in approach:
                log_name = '{}_{}_{}_{}_{}_{}_lamb_{}'.format(date, header, steps, experiment, approach, seed, ewc_lambda)
            elif approach == 'blip':
                log_name = '{}_{}_{}_{}_{}_{}_F_prior_{}'.format(date, header, steps, experiment, approach, seed, F_prior)
            mat_file = os.path.join('./result_data/', log_name + '_result.mat')
            mat_contents = sio.loadmat(mat_file)
            te_reward_arr = mat_contents['te_reward_arr'][0][0][0][0][0][0][0]
            num_samples = len(te_reward_arr[0][0])
            len_task = int(num_samples / t)
            for i in range(t):
                index = (i+1)*len_task - 1
                for j in range(t):        
                    r[i][j] = te_reward_arr[j][0][index]
            r_all.append(r)

        r_median = np.median(r_all, axis=0)
        print('Approach:', approach)
        print('ACC:', acc(r_median))
        print('BWT:', bwt(r_median),'\n')

Experiment: doorkey-wallgap-lavagap-redbluedoor 

Approach: blip
ACC: (0.6569375, 0.33508830394535144)
BWT: (0.008847222222222303, 0.013337513812227392) 

Approach: ewc
ACC: (0.7311770833333333, 0.24676056309534541)
BWT: (0.011513888888888898, 0.018227927206539825) 

Approach: fine-tuning
ACC: (0.0, 0.0)
BWT: (-0.9154305555555556, 0.06028952458504998) 

Experiment: redbluedoor-lavagap-doorkey-wallgap 

Approach: blip
ACC: (0.65565625, 0.23192539579444452)
BWT: (-0.01918055555555546, 0.14070915109052884) 

Approach: ewc
ACC: (0.7026458333333333, 0.21995960112818952)
BWT: (-0.09122222222222215, 0.20138722125746336) 

Approach: fine-tuning
ACC: (0.23703125, 0.4105501679815604)
BWT: (-0.8444166666666667, 0.10180737075841191) 

Experiment: doorkey-wallgap-lavagap-redbluedoor-emptyrand 

Approach: blip
ACC: (0.5294750000000001, 0.41980905679447494)
BWT: (0.01759375000000001, 0.01760756562354663) 

Approach: ewc
ACC: (0.7161333333333333, 0.243445469880948)
BWT: (-0.004447916666666628, 0.05710

In [None]:
mat_contents1 = sio.loadmat('./result_data/2023-01-21_minigrid_5e5_redbluedoor-lavagap-doorkey-wallgap_blip_1_F_prior_5e-18_result.mat')
mat_contents2 = sio.loadmat('./result_data/2023-01-21_minigrid_5e5_redbluedoor-lavagap-doorkey-wallgap_blip_2_F_prior_5e-18_result.mat')
mat_contents3 = sio.loadmat('./result_data/2023-01-21_minigrid_5e5_redbluedoor-lavagap-doorkey-wallgap_blip_3_F_prior_5e-18_result.mat')

In [None]:
tr_s1 = mat_contents1['tr_reward_arr']
tr_s2 = mat_contents2['tr_reward_arr']
tr_s3 = mat_contents3['tr_reward_arr']

te_s1 = mat_contents1['te_reward_arr']
te_s2 = mat_contents2['te_reward_arr']
te_s3 = mat_contents3['te_reward_arr']

te_s1_mean = te_s1[0][0][0][0][0][0][0]
te_s2_mean = te_s2[0][0][0][0][0][0][0]
te_s3_mean = te_s3[0][0][0][0][0][0][0]

In [None]:
te_median_0 = np.median(np.array([te_s1_mean[0][0], te_s2_mean[0][0], te_s3_mean[0][0]]), axis=0)
te_median_1 = np.median(np.array([te_s1_mean[1][0], te_s2_mean[1][0], te_s3_mean[1][0]]), axis=0)
te_median_2 = np.median(np.array([te_s1_mean[2][0], te_s2_mean[2][0], te_s3_mean[2][0]]), axis=0)
te_median_3 = np.median(np.array([te_s1_mean[3][0], te_s2_mean[3][0], te_s3_mean[3][0]]), axis=0)
#te_median_4 = np.median(np.array([te_s1_mean[4][0], te_s2_mean[4][0], te_s3_mean[4][0]]), axis=0)