## Load result files from toy-model results, fit various parameters of long-term forgetting, and then attach them to the files

In [5]:
import os
os.chdir('/Users/haozheshan/Dropbox/codes/gp_continual_learning/')
import numpy as np
import matplotlib.pyplot as plt
import theory, cluster_utils, torch, data, utils, pickle


%load_ext autoreload
%autoreload 2

# USE KEYWORDS BELOW TO SEARCH FOR FOLDERS
batch_name_list =cluster_utils.list_folders('cluster_results/', 'diff_strength', 'toy')

#################
folder_index = 1
#################
organizer = cluster_utils.ClusterResultOrganizer('cluster_results/', batch_name=batch_name_list[folder_index], sort_by_key=None)
all_train_loss = organizer.organize_results('train loss')
all_train_mag = organizer.organize_results('train magnitude')

print(organizer.all_data_obj[0].keys())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generating a list at 2023-05-27 22:25
[0] gp_toy_30x50_xsim80_1L_10context_diff_strength
[1] gp_toy_30x50_xsim90_1L_50context_diff_strength
[2] gp_toy_30x50_xsim60_1L_50context_diff_strength
[3] gp_toy_30x50_xsim30_1L_50context_diff_strength
[4] gp_toy_30x50_xsim80_1L_50context_diff_strength
[5] gp_toy_30x50_xsim60_1L_10context_diff_strength
[6] gp_toy_30x50_xsim0_1L_50context_diff_strength
No key was specified. Automatically using key <<context_strength>> to sort the results.
"NSEEDS" found in the arguments. Assuming that each file contains multiple random seeds.
11 data objects loaded from folder "gp_toy_30x50_xsim90_1L_50context_diff_strength".
For key <<context_strength>>, the values are [35.0, 30.0, 20.0, 25.0, 0.0, 45.0, 40.0, 5.0, 50.0, 15.0, 10.0]
Available data keys are dict_keys(['args', 'train loss', 'test loss', 'train acc', 'test acc', 'train loss naive', 'test loss naive', 'train acc n

## The cell below does an exponential fit to the loss curves and magnitude curves and compute two forgetting OPs. The results are saved back into the pickle files.

In [6]:
# in toy-model results, each file contains all random seeds corresponding to the same set of hyperparameters
for file_ind in range(len(organizer.all_data_obj)):

    # fix exponential relaxation to M and A dynamics
    L1 = organizer.all_data_obj[file_ind]['train loss'].mean(0)[0]
    M1 = np.array(organizer.all_data_obj[file_ind]['train magnitude']).mean(0)[0]
    A1 = -0.5 * (L1 - M1 - 1)
    m_asym, m_tau, _ = utils.exponential_fit(np.arange(len(M1)), M1)
    a_asym, a_tau, _ = utils.exponential_fit(np.arange(len(M1)), A1)

    # recreate the input data
    _args = organizer.all_data_obj[file_ind]['args']

    all_trP1P2 = []
    all_V1_minus_V2 = []

    for seed in range(_args.NSEEDS):
        torch.manual_seed(seed)

        seq_of_train_x, seq_of_test_x, seq_of_train_y, _ =\
            data.prepare_cluster_dataset(num_tasks=2,
                                        train_p=_args.P,
                                        test_p=2,
                                        num_clusters=_args.NC,
                                        input_dim=_args.N0,
                                        hidden_dim=_args.Nh,
                                        relative_radius=0.1,
                                        teacher_similarity=_args.tsim,
                                        input_similarity=_args.xsim,
                                        accumulate=False,
                                        precision=64)
        
        seq_of_train_x, seq_of_test_x = data.add_task_embedding(seq_of_train_x, seq_of_test_x, _args.N0context, _args.context_strength)

        k1 = theory.k_ntk(seq_of_train_x[0], seq_of_train_x[0], depth=_args.depth)
        k2 = theory.k_ntk(seq_of_train_x[1], seq_of_train_x[1], depth=_args.depth)
        k12 = theory.k_ntk(seq_of_train_x[0], seq_of_train_x[1], depth=_args.depth)
        k1_inv = torch.inverse(k1)
        k2_inv = torch.inverse(k2)

        y1 = seq_of_train_y[0]
        y2 = seq_of_train_y[1]

        all_trP1P2.append(float(torch.trace(k1_inv @ k12 @ k2_inv @ k12.T) / _args.P))
        all_V1_minus_V2.append(2 - 2 * float(y1.T @ k1_inv @ k12 @ k2_inv @ y2) / float((y1.T @ k1_inv @ y1)))
    
    all_trP1P2 = np.array(all_trP1P2)
    all_V1_minus_V2 = np.array(all_V1_minus_V2)

    # save the new data
    organizer.all_data_obj[file_ind]['M tau'] = m_tau
    organizer.all_data_obj[file_ind]['A tau'] = a_tau
    organizer.all_data_obj[file_ind]['M asym'] = m_asym
    organizer.all_data_obj[file_ind]['A asym'] = a_asym
    organizer.all_data_obj[file_ind]['tr(P1P2)/P'] = all_trP1P2
    organizer.all_data_obj[file_ind]['V1-V2'] = all_V1_minus_V2
    
    pickle.dump(organizer.all_data_obj[file_ind], open(organizer.file_path + organizer.file_name_list[file_ind], 'wb'))

In [28]:
len(organizer.file_name_list)

9