In [1]:
import torch
import torch.nn.functional as F

import math
import os
import logging
import numpy as np
import copy
from pdb import set_trace

from torchvision import transforms
from torchvision.transforms import ToTensor, Resize, Compose
from dataloaders import init_dataloaders

from MAML.model import ModelConvSynbols, ModelConvOmniglot, ModelConvMiniImagenet, ModelMLPSinusoid
from MAML.metalearners import ModelAgnosticMetaLearning, ModularMAML, ProtoMAML
from MAML.utils import ToTensor1D, set_seed, is_connected

from Utils.bgd_lib.bgd_optimizer import create_BGD_optimizer

In [2]:
from types import SimpleNamespace
from collections import namedtuple
import main

# Meta dataloaders
MetaDLs = namedtuple('MetaDLs', 'train val cl')

args = SimpleNamespace(
    dataset='omniglot',
    folder='Data',
    num_shots=5, # default=5
    num_ways=5,  # default=5
    num_shots_test=15, # default=15
    batch_size=25,     # default=25
    # CL args group
    prob_statio=0.98,  # default=0.98
    task_sequence=None, # default=None
    n_steps_per_task=1, # default=1
    use_different_nways=False, # default=False
)
args.device = torch.device('cuda')

meta_dls = MetaDLs(*init_dataloaders(args))

load from omniglot.npy.


In [3]:
print('Initializing models...')
wandb = None
args.meta_lr = 0.001
    
def init_models():
    model = ModelConvOmniglot(args.num_ways, hidden_size=64, deeper=0)
    loss_function = F.cross_entropy

    meta_optimizer = torch.optim.Adam(model.parameters(), lr=args.meta_lr)
    meta_optimizer_cl = meta_optimizer

    metalearner = ModelAgnosticMetaLearning(
        model, meta_optimizer, loss_function, args=SimpleNamespace(
            device=args.device,
            num_ways=args.num_ways,
            # Size of the fast adaptation step, ie. learning rate in the gradient descent update.
            step_size = 0.1, 
            is_classification_task = True,
             # 'Use the first order approximation, do not use higher-order derivatives during meta-optimization.
            first_order = 0,
            # for MRCL, freeze all conv layers at cl time
            freeze_visual_features = 0,
             # number of inner updates
            num_steps = 1,  # aka num_adaptation_steps
            # Whether or not to learn the (inner loop) step-size. 
            learn_step_size = False,
            # power for update modulation
            um_power = 0,
            # Whether ot not to learn param specific step-size
            per_param_step_size = False,
        ))
    return metalearner, meta_optimizer, meta_optimizer_cl

metalearner, meta_optimizer, meta_optimizer_cl = init_models()

Initializing models...


# Pretraining

In [4]:
num_epochs = 2
num_batches = 100 # Number of batch of tasks per epoch (default: 100)
verbose = True
patience = 5

best_val = 0.
epochs_overfitting = 0
epoch_desc = 'Epoch {{0: <{0}d}}'.format(1 + int(math.log10(num_epochs)))
print(f'\npretraining for {num_epochs} epochs...\n')
for epoch in range(num_epochs):
    metalearner.train(
        meta_dls.train,
        max_batches=num_batches,
        verbose=verbose,
        desc='Training',
        leave=False)
    results = metalearner.evaluate(
        meta_dls.val,
        max_batches=num_batches,
        verbose=verbose,
        epoch=epoch,
        desc=epoch_desc.format(epoch + 1))
    
    print(f'[{epoch}] results: {results}')
    result_val = results['accuracies_after']
    # early stopping:
    if best_val < result_val:
        epochs_overfitting = 0
        best_val = result_val
        best_metalearner = copy.deepcopy(metalearner)
    else:
        epochs_overfitting += 1
        if epochs_overfitting > patience:
            break

Training:   0%|          | 0/100 [00:00<?, ?it/s]


pretraining for 2 epochs...



Epoch 1: 100%|██████████| 100/100 [00:05<00:00, 18.61it/s, accuracy=0.8661, inner_loss=1.7562, loss=0.4014]       
Training:   1%|          | 1/100 [00:00<00:12,  7.90it/s, accuracy=0.9552, inner_loss=1.7310, outer_loss=0.1694]

[0] results: {'mean_outer_loss': 0.4014275380969048, 'accuracies_after': 0.866093360185623, 'mean_inner_loss': 1.7562124276161193}


Epoch 2: 100%|██████████| 100/100 [00:05<00:00, 18.69it/s, accuracy=0.8980, inner_loss=1.7297, loss=0.2999]       

[1] results: {'mean_outer_loss': 0.29987873092293743, 'accuracies_after': 0.8980400258302692, 'mean_inner_loss': 1.72973963022232}





# Continual Learning

In [5]:
cl_model_init = copy.deepcopy(metalearner)
del metalearner

(Single run)

In [6]:
timesteps = 100 # number of timesteps for the CL exp
algo3 = False

# TODO: why aren't most of these set within cl_model.__init__?
cl_model_init.optimizer_cl = meta_optimizer_cl
cl_model_init.cl_strategy = 'loss'
 # threshold for training on incoming data or not
cl_model_init.cl_strategy_thres = 4.0
# threshold for task boundary detection (-1 to turn off)
cl_model_init.cl_tbd_thres = -1 
if 0: # turn off meta-learning at CL time (TODO: wat)
    cl_model_init.no_meta_learning = True

cl_model = copy.deepcopy(cl_model_init)
_, _, meta_optimizer_cl = init_models()
cl_model.optimizer_cl = meta_optimizer_cl

In [None]:
modes = ['train', 'test', 'ood']
accuracies = np.zeros([n_runs, timesteps])
tbds = np.zeros([n_runs, timesteps])
avg_accuracies_mode = dict(zip(modes, [[], [], []]))
accuracies_mode = dict(zip(modes, [[], [], []]))

for i, batch in enumerate(meta_dls.cl):
    data, labels, task_switch, mode, _, _ = batch
    if algo3:
        results = cl_model.observe2(batch)
    else:
        results = cl_model.observe(batch)

    # Reporting:
    accuracy_after = results["accuracy_after"]
    accuracies[run, i] = accuracy_after
    accuracies_mode[mode[0]].append(accuracy_after)
    tbds[run, i] = float(results['tbd'])

    if (verbose and i % 100 == 0) or i == timesteps - 1:
        acc = np.mean(accuracies[run, :i])
        acc_mode = []
        for mode in modes:
            acc_mode.append(np.mean(accuracies_mode[mode]))
        acc_mode_str = [f'{m}_acc={a:.2f}' for m, a in zip(modes, acc_mode)]
        print(f'total Acc: {acc:.2f},', f'mode accs: {acc_mode_str}', end='\t')
        # Note: tbd==task boundary detection
        tbd = np.mean(tbds[run, :i])
        print(f'Total tbd: {tbd:.2f}', f'it: {i}', sep='\t')

    if i == timesteps - 1:
        for mode in modes:
            avg_accuracies_mode[mode].append(np.mean(accuracies_mode[mode]))
        if run == 0 and is_classification_task:
            if acc < 1. / float(args.num_ways) + 0.2:
                print({'fail': 1})
        break

In [15]:
for i, batch in enumerate(meta_dls.cl):
    data, labels, task_switch, mode, ways, shots = batch
    if algo3:
        results = cl_model.observe2(batch)
    else:
        results = cl_model.observe(batch)
        
    break

In [16]:
ways

tensor([5], device='cuda:0')

In [17]:
shots

tensor([5], device='cuda:0')

In [18]:
labels

tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4,
         4]], device='cuda:0')

In [9]:
data.shape

torch.Size([1, 25, 1, 28, 28])

In [None]:
def main(args):
    args, wandb = boilerplate(args)

    print('Initializing dataloaders...')
    # Notes:
    # - the train/val meta dls yield batches of {'train', 'test', 'ways', 'shots_tr', 'shots_te'}
    # - the cl meta dl yields list of 6 tensors (TODO?)
    meta_train_dataloader, meta_val_dataloader, cl_dataloader = init_dataloaders(args)



    print('Executing pretraining...')
    cl_model_init = pretraining(
        args,
        wandb,
        metalearner,
        meta_train_dataloader,
        meta_val_dataloader)

    print('Executing continual learning...')
    continual_learning(
        args,
        wandb,
        cl_model_init,
        meta_optimizer_cl,
        cl_dataloader)