In [1]:
import os
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

os.environ["CUDA_VISIBLE_DEVICES"]="2"

from dassl.config import get_cfg_default
from yacs.config import CfgNode as CN

import torch
import torch.optim as optim 

import pickle
import random
import copy
import json
import numpy as np
import copy

from dassl.data.datasets.build import build_dataset
from dassl.data.transforms.transforms import build_transform
from dassl.data.data_manager import build_data_loader

import FSL.datasets.stanford_cars
import FSL.datasets.stanford_dogs
import FSL.datasets.caltech101
import FSL.datasets.oxford_flowers
import FSL.datasets.oxford_pets
import FSL.datasets.food101
import FSL.datasets.eurosat
import FSL.datasets.sun397
import FSL.datasets.fgvc_aircraft
import FSL.datasets.cub
import FSL.datasets.ucf101
import FSL.datasets.plantdoc
import FSL.datasets.imagenet

from utils.eval_utils import *
import utils.ssd as ssd


all_ds = ['StanfordDogs', 'StanfordCars',  'Caltech101', 'OxfordFlowers', 'Food101', 'DescribableTextures', 'EuroSAT', 'SUN397', 'FGVCAircraft', 'CUB', 'UCF101', 'PLANTDOC']
val_ds = [ 'EuroSAT', 'SUN397','Food101', 'DescribableTextures','PLANTDOC']

device = 'cuda:0'

cfg = get_cfg_default()

cfg.merge_from_file("configs/trainers/mainconfig/adam_lr2e-4_B256_ep200_ViT16.yaml")

cfg.DATASET.SUBSAMPLE_CLASSES = "all"
cfg.SEED = 0
cfg.DATASET.ROOT = "/app/datasets/"
cfg.DATALOADER.NUM_WORKERS = 0
# cfg.USE_CUDA = False
cfg.DATASET.NUM_SHOTS = -1

cfg.DATALOADER.TRAIN_X.BATCH_SIZE = 4
cfg.DATALOADER.TEST.BATCH_SIZE = 16

backbone_arch = "ViT-B/16"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
test_datasets = {}
test_dataloaders = {}
train_loaders = {}
datasets_cls = {}
classnames = {}

cfg_original = copy.deepcopy(cfg)
for ds in all_ds:
    cfg = copy.deepcopy(cfg_original)
    cfg.DATASET.NAME = ds
    tfm_train = build_transform(cfg, is_train=True)
    tfm_test = build_transform(cfg, is_train=False)

    dataset = build_dataset(cfg)
    test_loader_all = build_data_loader(
                cfg,
                sampler_type=cfg.DATALOADER.TEST.SAMPLER,
                data_source=dataset.test,
                batch_size=cfg.DATALOADER.TEST.BATCH_SIZE,
                tfm=tfm_test,
                is_train=False,
                dataset_wrapper=None
            )

    train_loader_all = build_data_loader(
                cfg,
                sampler_type='RandomSampler',
                data_source=dataset.train_x,
                batch_size=64,
                tfm = tfm_test,
                is_train=False,
                dataset_wrapper=None
            )

    test_datasets[ds] = dataset
    test_dataloaders[ds] = test_loader_all
    train_loaders[ds] = train_loader_all
    datasets_cls[ds] = dataset
    classnames[ds] = dataset.classnames


with open(f"assets/results_zs_all_ViT16.pkl", "rb") as f:
    results_zs = pickle.load(f)  

myseed=cfg.SEED
torch.manual_seed(myseed)
random.seed(myseed)
np.random.seed(myseed)

BATCH_SIZE = 64

Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Loading dataset: StanfordDogs
Reading split from /app/datasets/stanford_dogs/split_alexey_stanford_dogs.json
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711

#### Example of forgetting with SSD on StanfordDogs dataset
##### Compute importances for SSD algorithm

In [3]:
# Precomputed importances for all datasets

dampening_constant = 1.
selection_weighting = 30.
parameters = {
        "lower_bound": 1.,  # 1
        "exponent": 1.,  # unused
        "magnitude_diff": None,  # unused
        "min_layer": -1,  # -1: all layers are available for modification
        "max_layer": -1,  # -1: all layers are available for modification
        "forget_threshold": 1,  # unused
        "dampening_constant": dampening_constant,  # Lambda from paper
        "selection_weighting": selection_weighting,  # Alpha from paper
        "batch_size" : 64 # Important for importance calculations as quite sensitive!
    }

retain_loader = {
                     'StanfordDogs' : train_loaders['StanfordDogs'], 
                     'StanfordCars': train_loaders['StanfordCars'], 
                     'Caltech101': train_loaders['Caltech101'], 
                     'OxfordFlowers' : train_loaders['OxfordFlowers'], 
    
                     'CUB': train_loaders['CUB'], 
                     'UCF101' : train_loaders['UCF101'], 
    
                     'FGVCAircraft': train_loaders['FGVCAircraft'], 
                }

In [4]:
model = get_model(device=device, arch=backbone_arch)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
pdr = ssd.ParameterPerturber(model, optimizer, device, parameters)

Loading model..
{'lower_bound': 1.0, 'exponent': 1.0, 'magnitude_diff': None, 'min_layer': -1, 'max_layer': -1, 'forget_threshold': 1, 'dampening_constant': 1.0, 'selection_weighting': 30.0, 'batch_size': 64}


In [None]:
original_importances = pdr.calc_importance(retain_loader, classnames, aggregate=False)

In [None]:
importances_all = {}
for file in os.listdir("ssd_importances/"):
    if file.startswith("importance"):
        ds = torch.load(f"ssd_importances/{file}")
        ds_name = file.split("_")[1].replace(".pt", "")
        print(ds_name)
        importances_all[ds_name] = ds

In [None]:
torch.save(importances_all, f"ssd_importances/all_importances_batch_64.pt")

##### Unlearn

In [5]:
forget_ds = 'StanfordDogs'
id_test = 0
forget_loader = {forget_ds : train_loaders[forget_ds]}

retain_loader = {
                     'Caltech101': train_loaders['Caltech101'], 
                     'OxfordFlowers' : train_loaders['OxfordFlowers'], 
                     'CUB': train_loaders['CUB'],     
                }

retain_ds = '|'.join([k for k in retain_loader.keys()])
                 
full_name = f"forget_{forget_ds}_retain_{retain_ds}_attempt_{id_test}"

forget_list = list(forget_loader.keys())
retain_list = list(retain_loader.keys())
    
full_name

'forget_StanfordDogs_retain_Caltech101|OxfordFlowers|CUB_attempt_0'

In [6]:
# path = f"ssd_importances/all_importances_batch_64.pt"
path = f"/app/few_shot_unlearning_old/ssd_importances/all_importances_batch_64.pt"
# Calculation of the forget set importances
sample_importances = pdr.calc_importance_loaded(path, forget_list)

# Calculate the importances of the retain sets
original_importances = pdr.calc_importance_loaded(path, retain_list)

len n_ds 1.0
len n_ds 3.0


In [7]:
# load the trained model

dampening_constant = 1.
selection_weighting = 30.
parameters = {
        "lower_bound": 1.,  # 1
        "exponent": 1.,  # unused
        "magnitude_diff": None,  # unused
        "min_layer": -1,  # -1: all layers are available for modification
        "max_layer": -1,  # -1: all layers are available for modification
        "forget_threshold": 1,  # unused
        "dampening_constant": dampening_constant,  # Lambda from paper
        "selection_weighting": selection_weighting,  # Alpha from paper
        "batch_size" : 64 # Important for importance calculations as quite sensitive!
    }

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
model = get_model(device=device, arch=backbone_arch)
model = model.eval()

pdr = ssd.ParameterPerturber(model, optimizer, device, parameters)

# Dampen selected parameters
pdr.modify_weight(original_importances, sample_importances, ignore_params=['logit_scale'])

Loading model..
{'lower_bound': 1.0, 'exponent': 1.0, 'magnitude_diff': None, 'min_layer': -1, 'max_layer': -1, 'forget_threshold': 1, 'dampening_constant': 1.0, 'selection_weighting': 30.0, 'batch_size': 64}


In [None]:
with torch.no_grad():
    results_ds = eval_all_ds(model, datasets_cls, test_dataloaders, device=device)

100% 521/521 [00:56<00:00,  9.21it/s]


++++++++++++++++++++++++++++++ StanfordDogs - 0.020393474088291747 ++++++++++++++++++++++++++++++


100% 503/503 [01:21<00:00,  6.14it/s]


++++++++++++++++++++++++++++++ StanfordCars - 0.6189528665588857 ++++++++++++++++++++++++++++++


100% 155/155 [00:12<00:00, 12.10it/s]


++++++++++++++++++++++++++++++ Caltech101 - 0.9217038539553752 ++++++++++++++++++++++++++++++


  1% 2/154 [00:00<00:19,  7.83it/s]

#### Compute MMD weights

In [None]:
model = get_model(device=device, arch=backbone_arch)
model.eval()

In [None]:
torch.manual_seed(0)
with torch.no_grad():
    for key in train_loaders:
        if key != 'ImageNet': continue
        print(key)
        features = {'text' : "", 'images' : []}
        ds_loader = train_loaders[key]
        for batch in tqdm(ds_loader):
            img = batch['img'].cuda()
            features['images'].append(model.encode_image(img).detach().cpu().numpy().squeeze())

        features['images'] = np.concatenate(features['images'])
        features['text'] = model.encode_text(clip.tokenize(datasets_cls[key].classnames).cuda())#.detach().cpu().numpy()

        with open(f"features_embeddings/features_{key}.pkl", "wb") as f:
            pickle.dump(features, f)

In [None]:
mmd_similarities = {}

main_path = f"features_embeddings/features"

for key1 in tqdm(train_loaders):
    for key2 in train_loaders:
        key_ds = '_'.join(sorted([key1, key2]))
        if key1 != key2 and key_ds not in mmd_similarities:
            with open(f"{main_path}_{key1}.pkl", "rb") as f:
                feat_key1 = pickle.load(f)
            with open(f"{main_path}_{key2}.pkl", "rb") as f:
                feat_key2 = pickle.load(f)
                
            mmd_similarities[key_ds] = mmd_rbf(feat_key1['images'], feat_key2['images'], gamma=0.01)

In [None]:
mmd_similarities_text = {}

for key1 in tqdm(train_loaders):
    for key2 in train_loaders:
        key_ds = '_'.join(sorted([key1, key2]))
        if key1 != key2 and key_ds not in mmd_similarities_text:
            with open(f"{main_path}_{key1}.pkl", "rb") as f:
                feat_key1 = pickle.load(f)
            with open(f"{main_path}_{key2}.pkl", "rb") as f:
                feat_key2 = pickle.load(f)
                
            mmd_similarities_text[key_ds] = mmd_rbf(feat_key1['text'].detach().cpu().numpy(), feat_key2['text'].detach().cpu().numpy(), gamma=0.01)

In [None]:
with open(f"features_embeddings/mmd_sim_images.pkl", "wb") as f:
    pickle.dump(mmd_similarities, f)
    
with open(f"features_embeddings/mmd_sim_text.pkl", "wb") as f:
    pickle.dump(mmd_similarities_text, f)

In [None]:
with open(f"features_embeddings/mmd_sim_images.pkl", "rb") as f:
    mmd_similarities = pickle.load(f)
    
with open(f"features_embeddings/mmd_sim_text.pkl", "rb") as f:
    mmd_similarities_text = pickle.load(f)

zs_clip_results =  {
    "EuroSAT": 48.383,
    "StanfordCars": 65.514,
    "PLANTDOC": 34.994,
    "DescribableTextures": 43.972,
    "StanfordDogs": 59.117,
    "SUN397": 62.579,
    "FGVCAircraft": 24.752,
    "CUB": 55.009,
    "Caltech101": 93.306,
    "Food101": 85.888,
    "UCF101": 67.46,
    "OxfordFlowers": 70.767
}

weights = weighted_loss(forget_ds, val_ds, mmd_similarities_text, mmd_similarities)

In [None]:
diff = {}
for k in zs_clip_results:
    if k != forget_ds:
        print(k, round(max(zs_clip_results[k] - results_ds[k]['all']['all_ds']*100, 0), 3))
        diff[k] = zs_clip_results[k] - results_ds[k]['all']['all_ds']*100
        
# difference on validation sets (list knowledge of CLIP)
np.sum([diff[k] * weights[k] for k in val_ds])