In [12]:

from torchvision import datasets, transforms

In [15]:
train = datasets.cifar.CIFAR100("./data", train=True, download=False)
test = datasets.cifar.CIFAR100("./data", train=False, download=False)

In [14]:
train


Dataset CIFAR100
    Number of datapoints: 50000
    Root location: ./data
    Split: Train

In [16]:
test

Dataset CIFAR100
    Number of datapoints: 10000
    Root location: ./data
    Split: Test

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [None]:
from models.adam_adapter import Learner
json = "./exps/adam_adapter.json"

In [2]:
from models.simplecil import Learner
json = "./exps/simplecil.json"

In [3]:
def _set_random(seed=1):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
def _set_device(args):
    device_type = args["device"]
    gpus = []

    for device in device_type:
        if device == -1:
            device = torch.device("cpu")
        else:
            device = torch.device("cuda:{}".format(device))

        gpus.append(device)

    args["device"] = gpus

In [5]:

from utils.data_manager import DataManager
from utils.toolkit import tensor2numpy, accuracy
from torch.utils.data import DataLoader

In [6]:
def load_json(setting_path):
    import json
    with open(setting_path) as data_file:
        param = json.load(data_file)
    return param

In [7]:
args = load_json(json)
args["seed"] = args["seed"][0]
#args["device"] = args["device"][0]
_set_random(args["seed"])
#_set_device(args)
args["device"][0] = "cuda:7"
torch.device(args["device"][0])

device(type='cuda', index=7)

In [8]:
args

{'prefix': ' ',
 'dataset': 'cifar224',
 'memory_size': 0,
 'memory_per_class': 0,
 'fixed_memory': False,
 'shuffle': True,
 'init_cls': 10,
 'increment': 10,
 'model_name': 'simplecil',
 'backbone_type': 'pretrained_vit_b16_224',
 'device': ['cuda:7'],
 'seed': 1993,
 'tuned_epoch': 0,
 'init_lr': 0.01,
 'batch_size': 256,
 'weight_decay': 0.05,
 'min_lr': 1e-08,
 'optimizer': 'sgd',
 'vpt_type': 'shallow',
 'prompt_token_num': 3}

In [6]:
PATH = "./checkpoints/adam_adapter/task_1.pkl"

checkpoint = torch.load(PATH)

task = checkpoint['tasks']
args.update(checkpoint['model_state_dict'])


In [21]:
args["blocks.0.adaptmlp.down_proj.weight"]

tensor([[ 0.0312, -0.0190,  0.0140,  ...,  0.0216,  0.0246,  0.0246],
        [ 0.0344, -0.0331,  0.0220,  ...,  0.0070, -0.0033, -0.0178],
        [-0.0307,  0.0150, -0.0314,  ...,  0.0098,  0.0316,  0.0103],
        ...,
        [ 0.0143, -0.0060, -0.0096,  ...,  0.0142, -0.0134,  0.0114],
        [-0.0041, -0.0273,  0.0061,  ...,  0.0227,  0.0149,  0.0047],
        [ 0.0328,  0.0253,  0.0149,  ..., -0.0328,  0.0213, -0.0010]])

In [18]:
args.update(state_dict)

In [28]:
model = Learner(args)

This is for the BaseNet initialization.
After BaseNet initialization.


In [10]:
model._network

SimpleVitNet(
  (backbone): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='non

In [12]:
model._network.backbone

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [13]:
model.construct_dual_branch_network()

This is for the BaseNet initialization.
I'm using ViT with adapters.
_IncompatibleKeys(missing_keys=['blocks.0.adaptmlp.down_proj.weight', 'blocks.0.adaptmlp.down_proj.bias', 'blocks.0.adaptmlp.up_proj.weight', 'blocks.0.adaptmlp.up_proj.bias', 'blocks.1.adaptmlp.down_proj.weight', 'blocks.1.adaptmlp.down_proj.bias', 'blocks.1.adaptmlp.up_proj.weight', 'blocks.1.adaptmlp.up_proj.bias', 'blocks.2.adaptmlp.down_proj.weight', 'blocks.2.adaptmlp.down_proj.bias', 'blocks.2.adaptmlp.up_proj.weight', 'blocks.2.adaptmlp.up_proj.bias', 'blocks.3.adaptmlp.down_proj.weight', 'blocks.3.adaptmlp.down_proj.bias', 'blocks.3.adaptmlp.up_proj.weight', 'blocks.3.adaptmlp.up_proj.bias', 'blocks.4.adaptmlp.down_proj.weight', 'blocks.4.adaptmlp.down_proj.bias', 'blocks.4.adaptmlp.up_proj.weight', 'blocks.4.adaptmlp.up_proj.bias', 'blocks.5.adaptmlp.down_proj.weight', 'blocks.5.adaptmlp.down_proj.bias', 'blocks.5.adaptmlp.up_proj.weight', 'blocks.5.adaptmlp.up_proj.bias', 'blocks.6.adaptmlp.down_proj.weight

In [14]:
state_dict= checkpoint['model_state_dict']
keys = state_dict.keys()

In [15]:

# 'backbones.1.blocks.0.adaptmlp.down_proj.weight' -> 'blocks.0.adaptmlp.down_proj.weight'
for key in list(keys):
    if 'backbones.1.blocks' in key:
        entry = state_dict.pop(key)
        state_dict[key.replace('backbones.1.blocks', 'blocks')] = entry

In [17]:
state_dict.keys()

odict_keys(['backbones.0.cls_token', 'backbones.0.pos_embed', 'backbones.0.patch_embed.proj.weight', 'backbones.0.patch_embed.proj.bias', 'backbones.0.blocks.0.norm1.weight', 'backbones.0.blocks.0.norm1.bias', 'backbones.0.blocks.0.attn.qkv.weight', 'backbones.0.blocks.0.attn.qkv.bias', 'backbones.0.blocks.0.attn.proj.weight', 'backbones.0.blocks.0.attn.proj.bias', 'backbones.0.blocks.0.norm2.weight', 'backbones.0.blocks.0.norm2.bias', 'backbones.0.blocks.0.mlp.fc1.weight', 'backbones.0.blocks.0.mlp.fc1.bias', 'backbones.0.blocks.0.mlp.fc2.weight', 'backbones.0.blocks.0.mlp.fc2.bias', 'backbones.0.blocks.1.norm1.weight', 'backbones.0.blocks.1.norm1.bias', 'backbones.0.blocks.1.attn.qkv.weight', 'backbones.0.blocks.1.attn.qkv.bias', 'backbones.0.blocks.1.attn.proj.weight', 'backbones.0.blocks.1.attn.proj.bias', 'backbones.0.blocks.1.norm2.weight', 'backbones.0.blocks.1.norm2.bias', 'backbones.0.blocks.1.mlp.fc1.weight', 'backbones.0.blocks.1.mlp.fc1.bias', 'backbones.0.blocks.1.mlp.fc2.

In [17]:
model._network.load_state_dict(checkpoint['model_state_dict'])

RuntimeError: Error(s) in loading state_dict for MultiBranchCosineIncrementalNet:
	size mismatch for fc.weight: copying a param with shape torch.Size([20, 1536]) from checkpoint, the shape in current model is torch.Size([10, 1536]).

In [12]:
checkpoint['model_state_dict'].keys()

odict_keys(['backbones.0.cls_token', 'backbones.0.pos_embed', 'backbones.0.patch_embed.proj.weight', 'backbones.0.patch_embed.proj.bias', 'backbones.0.blocks.0.norm1.weight', 'backbones.0.blocks.0.norm1.bias', 'backbones.0.blocks.0.attn.qkv.weight', 'backbones.0.blocks.0.attn.qkv.bias', 'backbones.0.blocks.0.attn.proj.weight', 'backbones.0.blocks.0.attn.proj.bias', 'backbones.0.blocks.0.norm2.weight', 'backbones.0.blocks.0.norm2.bias', 'backbones.0.blocks.0.mlp.fc1.weight', 'backbones.0.blocks.0.mlp.fc1.bias', 'backbones.0.blocks.0.mlp.fc2.weight', 'backbones.0.blocks.0.mlp.fc2.bias', 'backbones.0.blocks.1.norm1.weight', 'backbones.0.blocks.1.norm1.bias', 'backbones.0.blocks.1.attn.qkv.weight', 'backbones.0.blocks.1.attn.qkv.bias', 'backbones.0.blocks.1.attn.proj.weight', 'backbones.0.blocks.1.attn.proj.bias', 'backbones.0.blocks.1.norm2.weight', 'backbones.0.blocks.1.norm2.bias', 'backbones.0.blocks.1.mlp.fc1.weight', 'backbones.0.blocks.1.mlp.fc1.bias', 'backbones.0.blocks.1.mlp.fc2.

In [17]:
m = nn.DataParallel(model._network)
m.load_state_dict(checkpoint['model_state_dict'])


RuntimeError: Error(s) in loading state_dict for DataParallel:
	Missing key(s) in state_dict: "module.backbone.cls_token", "module.backbone.pos_embed", "module.backbone.patch_embed.proj.weight", "module.backbone.patch_embed.proj.bias", "module.backbone.blocks.0.norm1.weight", "module.backbone.blocks.0.norm1.bias", "module.backbone.blocks.0.attn.q_proj.weight", "module.backbone.blocks.0.attn.q_proj.bias", "module.backbone.blocks.0.attn.v_proj.weight", "module.backbone.blocks.0.attn.v_proj.bias", "module.backbone.blocks.0.attn.k_proj.weight", "module.backbone.blocks.0.attn.k_proj.bias", "module.backbone.blocks.0.attn.proj.weight", "module.backbone.blocks.0.attn.proj.bias", "module.backbone.blocks.0.norm2.weight", "module.backbone.blocks.0.norm2.bias", "module.backbone.blocks.0.fc1.weight", "module.backbone.blocks.0.fc1.bias", "module.backbone.blocks.0.fc2.weight", "module.backbone.blocks.0.fc2.bias", "module.backbone.blocks.0.adaptmlp.down_proj.weight", "module.backbone.blocks.0.adaptmlp.down_proj.bias", "module.backbone.blocks.0.adaptmlp.up_proj.weight", "module.backbone.blocks.0.adaptmlp.up_proj.bias", "module.backbone.blocks.1.norm1.weight", "module.backbone.blocks.1.norm1.bias", "module.backbone.blocks.1.attn.q_proj.weight", "module.backbone.blocks.1.attn.q_proj.bias", "module.backbone.blocks.1.attn.v_proj.weight", "module.backbone.blocks.1.attn.v_proj.bias", "module.backbone.blocks.1.attn.k_proj.weight", "module.backbone.blocks.1.attn.k_proj.bias", "module.backbone.blocks.1.attn.proj.weight", "module.backbone.blocks.1.attn.proj.bias", "module.backbone.blocks.1.norm2.weight", "module.backbone.blocks.1.norm2.bias", "module.backbone.blocks.1.fc1.weight", "module.backbone.blocks.1.fc1.bias", "module.backbone.blocks.1.fc2.weight", "module.backbone.blocks.1.fc2.bias", "module.backbone.blocks.1.adaptmlp.down_proj.weight", "module.backbone.blocks.1.adaptmlp.down_proj.bias", "module.backbone.blocks.1.adaptmlp.up_proj.weight", "module.backbone.blocks.1.adaptmlp.up_proj.bias", "module.backbone.blocks.2.norm1.weight", "module.backbone.blocks.2.norm1.bias", "module.backbone.blocks.2.attn.q_proj.weight", "module.backbone.blocks.2.attn.q_proj.bias", "module.backbone.blocks.2.attn.v_proj.weight", "module.backbone.blocks.2.attn.v_proj.bias", "module.backbone.blocks.2.attn.k_proj.weight", "module.backbone.blocks.2.attn.k_proj.bias", "module.backbone.blocks.2.attn.proj.weight", "module.backbone.blocks.2.attn.proj.bias", "module.backbone.blocks.2.norm2.weight", "module.backbone.blocks.2.norm2.bias", "module.backbone.blocks.2.fc1.weight", "module.backbone.blocks.2.fc1.bias", "module.backbone.blocks.2.fc2.weight", "module.backbone.blocks.2.fc2.bias", "module.backbone.blocks.2.adaptmlp.down_proj.weight", "module.backbone.blocks.2.adaptmlp.down_proj.bias", "module.backbone.blocks.2.adaptmlp.up_proj.weight", "module.backbone.blocks.2.adaptmlp.up_proj.bias", "module.backbone.blocks.3.norm1.weight", "module.backbone.blocks.3.norm1.bias", "module.backbone.blocks.3.attn.q_proj.weight", "module.backbone.blocks.3.attn.q_proj.bias", "module.backbone.blocks.3.attn.v_proj.weight", "module.backbone.blocks.3.attn.v_proj.bias", "module.backbone.blocks.3.attn.k_proj.weight", "module.backbone.blocks.3.attn.k_proj.bias", "module.backbone.blocks.3.attn.proj.weight", "module.backbone.blocks.3.attn.proj.bias", "module.backbone.blocks.3.norm2.weight", "module.backbone.blocks.3.norm2.bias", "module.backbone.blocks.3.fc1.weight", "module.backbone.blocks.3.fc1.bias", "module.backbone.blocks.3.fc2.weight", "module.backbone.blocks.3.fc2.bias", "module.backbone.blocks.3.adaptmlp.down_proj.weight", "module.backbone.blocks.3.adaptmlp.down_proj.bias", "module.backbone.blocks.3.adaptmlp.up_proj.weight", "module.backbone.blocks.3.adaptmlp.up_proj.bias", "module.backbone.blocks.4.norm1.weight", "module.backbone.blocks.4.norm1.bias", "module.backbone.blocks.4.attn.q_proj.weight", "module.backbone.blocks.4.attn.q_proj.bias", "module.backbone.blocks.4.attn.v_proj.weight", "module.backbone.blocks.4.attn.v_proj.bias", "module.backbone.blocks.4.attn.k_proj.weight", "module.backbone.blocks.4.attn.k_proj.bias", "module.backbone.blocks.4.attn.proj.weight", "module.backbone.blocks.4.attn.proj.bias", "module.backbone.blocks.4.norm2.weight", "module.backbone.blocks.4.norm2.bias", "module.backbone.blocks.4.fc1.weight", "module.backbone.blocks.4.fc1.bias", "module.backbone.blocks.4.fc2.weight", "module.backbone.blocks.4.fc2.bias", "module.backbone.blocks.4.adaptmlp.down_proj.weight", "module.backbone.blocks.4.adaptmlp.down_proj.bias", "module.backbone.blocks.4.adaptmlp.up_proj.weight", "module.backbone.blocks.4.adaptmlp.up_proj.bias", "module.backbone.blocks.5.norm1.weight", "module.backbone.blocks.5.norm1.bias", "module.backbone.blocks.5.attn.q_proj.weight", "module.backbone.blocks.5.attn.q_proj.bias", "module.backbone.blocks.5.attn.v_proj.weight", "module.backbone.blocks.5.attn.v_proj.bias", "module.backbone.blocks.5.attn.k_proj.weight", "module.backbone.blocks.5.attn.k_proj.bias", "module.backbone.blocks.5.attn.proj.weight", "module.backbone.blocks.5.attn.proj.bias", "module.backbone.blocks.5.norm2.weight", "module.backbone.blocks.5.norm2.bias", "module.backbone.blocks.5.fc1.weight", "module.backbone.blocks.5.fc1.bias", "module.backbone.blocks.5.fc2.weight", "module.backbone.blocks.5.fc2.bias", "module.backbone.blocks.5.adaptmlp.down_proj.weight", "module.backbone.blocks.5.adaptmlp.down_proj.bias", "module.backbone.blocks.5.adaptmlp.up_proj.weight", "module.backbone.blocks.5.adaptmlp.up_proj.bias", "module.backbone.blocks.6.norm1.weight", "module.backbone.blocks.6.norm1.bias", "module.backbone.blocks.6.attn.q_proj.weight", "module.backbone.blocks.6.attn.q_proj.bias", "module.backbone.blocks.6.attn.v_proj.weight", "module.backbone.blocks.6.attn.v_proj.bias", "module.backbone.blocks.6.attn.k_proj.weight", "module.backbone.blocks.6.attn.k_proj.bias", "module.backbone.blocks.6.attn.proj.weight", "module.backbone.blocks.6.attn.proj.bias", "module.backbone.blocks.6.norm2.weight", "module.backbone.blocks.6.norm2.bias", "module.backbone.blocks.6.fc1.weight", "module.backbone.blocks.6.fc1.bias", "module.backbone.blocks.6.fc2.weight", "module.backbone.blocks.6.fc2.bias", "module.backbone.blocks.6.adaptmlp.down_proj.weight", "module.backbone.blocks.6.adaptmlp.down_proj.bias", "module.backbone.blocks.6.adaptmlp.up_proj.weight", "module.backbone.blocks.6.adaptmlp.up_proj.bias", "module.backbone.blocks.7.norm1.weight", "module.backbone.blocks.7.norm1.bias", "module.backbone.blocks.7.attn.q_proj.weight", "module.backbone.blocks.7.attn.q_proj.bias", "module.backbone.blocks.7.attn.v_proj.weight", "module.backbone.blocks.7.attn.v_proj.bias", "module.backbone.blocks.7.attn.k_proj.weight", "module.backbone.blocks.7.attn.k_proj.bias", "module.backbone.blocks.7.attn.proj.weight", "module.backbone.blocks.7.attn.proj.bias", "module.backbone.blocks.7.norm2.weight", "module.backbone.blocks.7.norm2.bias", "module.backbone.blocks.7.fc1.weight", "module.backbone.blocks.7.fc1.bias", "module.backbone.blocks.7.fc2.weight", "module.backbone.blocks.7.fc2.bias", "module.backbone.blocks.7.adaptmlp.down_proj.weight", "module.backbone.blocks.7.adaptmlp.down_proj.bias", "module.backbone.blocks.7.adaptmlp.up_proj.weight", "module.backbone.blocks.7.adaptmlp.up_proj.bias", "module.backbone.blocks.8.norm1.weight", "module.backbone.blocks.8.norm1.bias", "module.backbone.blocks.8.attn.q_proj.weight", "module.backbone.blocks.8.attn.q_proj.bias", "module.backbone.blocks.8.attn.v_proj.weight", "module.backbone.blocks.8.attn.v_proj.bias", "module.backbone.blocks.8.attn.k_proj.weight", "module.backbone.blocks.8.attn.k_proj.bias", "module.backbone.blocks.8.attn.proj.weight", "module.backbone.blocks.8.attn.proj.bias", "module.backbone.blocks.8.norm2.weight", "module.backbone.blocks.8.norm2.bias", "module.backbone.blocks.8.fc1.weight", "module.backbone.blocks.8.fc1.bias", "module.backbone.blocks.8.fc2.weight", "module.backbone.blocks.8.fc2.bias", "module.backbone.blocks.8.adaptmlp.down_proj.weight", "module.backbone.blocks.8.adaptmlp.down_proj.bias", "module.backbone.blocks.8.adaptmlp.up_proj.weight", "module.backbone.blocks.8.adaptmlp.up_proj.bias", "module.backbone.blocks.9.norm1.weight", "module.backbone.blocks.9.norm1.bias", "module.backbone.blocks.9.attn.q_proj.weight", "module.backbone.blocks.9.attn.q_proj.bias", "module.backbone.blocks.9.attn.v_proj.weight", "module.backbone.blocks.9.attn.v_proj.bias", "module.backbone.blocks.9.attn.k_proj.weight", "module.backbone.blocks.9.attn.k_proj.bias", "module.backbone.blocks.9.attn.proj.weight", "module.backbone.blocks.9.attn.proj.bias", "module.backbone.blocks.9.norm2.weight", "module.backbone.blocks.9.norm2.bias", "module.backbone.blocks.9.fc1.weight", "module.backbone.blocks.9.fc1.bias", "module.backbone.blocks.9.fc2.weight", "module.backbone.blocks.9.fc2.bias", "module.backbone.blocks.9.adaptmlp.down_proj.weight", "module.backbone.blocks.9.adaptmlp.down_proj.bias", "module.backbone.blocks.9.adaptmlp.up_proj.weight", "module.backbone.blocks.9.adaptmlp.up_proj.bias", "module.backbone.blocks.10.norm1.weight", "module.backbone.blocks.10.norm1.bias", "module.backbone.blocks.10.attn.q_proj.weight", "module.backbone.blocks.10.attn.q_proj.bias", "module.backbone.blocks.10.attn.v_proj.weight", "module.backbone.blocks.10.attn.v_proj.bias", "module.backbone.blocks.10.attn.k_proj.weight", "module.backbone.blocks.10.attn.k_proj.bias", "module.backbone.blocks.10.attn.proj.weight", "module.backbone.blocks.10.attn.proj.bias", "module.backbone.blocks.10.norm2.weight", "module.backbone.blocks.10.norm2.bias", "module.backbone.blocks.10.fc1.weight", "module.backbone.blocks.10.fc1.bias", "module.backbone.blocks.10.fc2.weight", "module.backbone.blocks.10.fc2.bias", "module.backbone.blocks.10.adaptmlp.down_proj.weight", "module.backbone.blocks.10.adaptmlp.down_proj.bias", "module.backbone.blocks.10.adaptmlp.up_proj.weight", "module.backbone.blocks.10.adaptmlp.up_proj.bias", "module.backbone.blocks.11.norm1.weight", "module.backbone.blocks.11.norm1.bias", "module.backbone.blocks.11.attn.q_proj.weight", "module.backbone.blocks.11.attn.q_proj.bias", "module.backbone.blocks.11.attn.v_proj.weight", "module.backbone.blocks.11.attn.v_proj.bias", "module.backbone.blocks.11.attn.k_proj.weight", "module.backbone.blocks.11.attn.k_proj.bias", "module.backbone.blocks.11.attn.proj.weight", "module.backbone.blocks.11.attn.proj.bias", "module.backbone.blocks.11.norm2.weight", "module.backbone.blocks.11.norm2.bias", "module.backbone.blocks.11.fc1.weight", "module.backbone.blocks.11.fc1.bias", "module.backbone.blocks.11.fc2.weight", "module.backbone.blocks.11.fc2.bias", "module.backbone.blocks.11.adaptmlp.down_proj.weight", "module.backbone.blocks.11.adaptmlp.down_proj.bias", "module.backbone.blocks.11.adaptmlp.up_proj.weight", "module.backbone.blocks.11.adaptmlp.up_proj.bias", "module.backbone.norm.weight", "module.backbone.norm.bias". 
	Unexpected key(s) in state_dict: "backbones.0.cls_token", "backbones.0.pos_embed", "backbones.0.patch_embed.proj.weight", "backbones.0.patch_embed.proj.bias", "backbones.0.blocks.0.norm1.weight", "backbones.0.blocks.0.norm1.bias", "backbones.0.blocks.0.attn.qkv.weight", "backbones.0.blocks.0.attn.qkv.bias", "backbones.0.blocks.0.attn.proj.weight", "backbones.0.blocks.0.attn.proj.bias", "backbones.0.blocks.0.norm2.weight", "backbones.0.blocks.0.norm2.bias", "backbones.0.blocks.0.mlp.fc1.weight", "backbones.0.blocks.0.mlp.fc1.bias", "backbones.0.blocks.0.mlp.fc2.weight", "backbones.0.blocks.0.mlp.fc2.bias", "backbones.0.blocks.1.norm1.weight", "backbones.0.blocks.1.norm1.bias", "backbones.0.blocks.1.attn.qkv.weight", "backbones.0.blocks.1.attn.qkv.bias", "backbones.0.blocks.1.attn.proj.weight", "backbones.0.blocks.1.attn.proj.bias", "backbones.0.blocks.1.norm2.weight", "backbones.0.blocks.1.norm2.bias", "backbones.0.blocks.1.mlp.fc1.weight", "backbones.0.blocks.1.mlp.fc1.bias", "backbones.0.blocks.1.mlp.fc2.weight", "backbones.0.blocks.1.mlp.fc2.bias", "backbones.0.blocks.2.norm1.weight", "backbones.0.blocks.2.norm1.bias", "backbones.0.blocks.2.attn.qkv.weight", "backbones.0.blocks.2.attn.qkv.bias", "backbones.0.blocks.2.attn.proj.weight", "backbones.0.blocks.2.attn.proj.bias", "backbones.0.blocks.2.norm2.weight", "backbones.0.blocks.2.norm2.bias", "backbones.0.blocks.2.mlp.fc1.weight", "backbones.0.blocks.2.mlp.fc1.bias", "backbones.0.blocks.2.mlp.fc2.weight", "backbones.0.blocks.2.mlp.fc2.bias", "backbones.0.blocks.3.norm1.weight", "backbones.0.blocks.3.norm1.bias", "backbones.0.blocks.3.attn.qkv.weight", "backbones.0.blocks.3.attn.qkv.bias", "backbones.0.blocks.3.attn.proj.weight", "backbones.0.blocks.3.attn.proj.bias", "backbones.0.blocks.3.norm2.weight", "backbones.0.blocks.3.norm2.bias", "backbones.0.blocks.3.mlp.fc1.weight", "backbones.0.blocks.3.mlp.fc1.bias", "backbones.0.blocks.3.mlp.fc2.weight", "backbones.0.blocks.3.mlp.fc2.bias", "backbones.0.blocks.4.norm1.weight", "backbones.0.blocks.4.norm1.bias", "backbones.0.blocks.4.attn.qkv.weight", "backbones.0.blocks.4.attn.qkv.bias", "backbones.0.blocks.4.attn.proj.weight", "backbones.0.blocks.4.attn.proj.bias", "backbones.0.blocks.4.norm2.weight", "backbones.0.blocks.4.norm2.bias", "backbones.0.blocks.4.mlp.fc1.weight", "backbones.0.blocks.4.mlp.fc1.bias", "backbones.0.blocks.4.mlp.fc2.weight", "backbones.0.blocks.4.mlp.fc2.bias", "backbones.0.blocks.5.norm1.weight", "backbones.0.blocks.5.norm1.bias", "backbones.0.blocks.5.attn.qkv.weight", "backbones.0.blocks.5.attn.qkv.bias", "backbones.0.blocks.5.attn.proj.weight", "backbones.0.blocks.5.attn.proj.bias", "backbones.0.blocks.5.norm2.weight", "backbones.0.blocks.5.norm2.bias", "backbones.0.blocks.5.mlp.fc1.weight", "backbones.0.blocks.5.mlp.fc1.bias", "backbones.0.blocks.5.mlp.fc2.weight", "backbones.0.blocks.5.mlp.fc2.bias", "backbones.0.blocks.6.norm1.weight", "backbones.0.blocks.6.norm1.bias", "backbones.0.blocks.6.attn.qkv.weight", "backbones.0.blocks.6.attn.qkv.bias", "backbones.0.blocks.6.attn.proj.weight", "backbones.0.blocks.6.attn.proj.bias", "backbones.0.blocks.6.norm2.weight", "backbones.0.blocks.6.norm2.bias", "backbones.0.blocks.6.mlp.fc1.weight", "backbones.0.blocks.6.mlp.fc1.bias", "backbones.0.blocks.6.mlp.fc2.weight", "backbones.0.blocks.6.mlp.fc2.bias", "backbones.0.blocks.7.norm1.weight", "backbones.0.blocks.7.norm1.bias", "backbones.0.blocks.7.attn.qkv.weight", "backbones.0.blocks.7.attn.qkv.bias", "backbones.0.blocks.7.attn.proj.weight", "backbones.0.blocks.7.attn.proj.bias", "backbones.0.blocks.7.norm2.weight", "backbones.0.blocks.7.norm2.bias", "backbones.0.blocks.7.mlp.fc1.weight", "backbones.0.blocks.7.mlp.fc1.bias", "backbones.0.blocks.7.mlp.fc2.weight", "backbones.0.blocks.7.mlp.fc2.bias", "backbones.0.blocks.8.norm1.weight", "backbones.0.blocks.8.norm1.bias", "backbones.0.blocks.8.attn.qkv.weight", "backbones.0.blocks.8.attn.qkv.bias", "backbones.0.blocks.8.attn.proj.weight", "backbones.0.blocks.8.attn.proj.bias", "backbones.0.blocks.8.norm2.weight", "backbones.0.blocks.8.norm2.bias", "backbones.0.blocks.8.mlp.fc1.weight", "backbones.0.blocks.8.mlp.fc1.bias", "backbones.0.blocks.8.mlp.fc2.weight", "backbones.0.blocks.8.mlp.fc2.bias", "backbones.0.blocks.9.norm1.weight", "backbones.0.blocks.9.norm1.bias", "backbones.0.blocks.9.attn.qkv.weight", "backbones.0.blocks.9.attn.qkv.bias", "backbones.0.blocks.9.attn.proj.weight", "backbones.0.blocks.9.attn.proj.bias", "backbones.0.blocks.9.norm2.weight", "backbones.0.blocks.9.norm2.bias", "backbones.0.blocks.9.mlp.fc1.weight", "backbones.0.blocks.9.mlp.fc1.bias", "backbones.0.blocks.9.mlp.fc2.weight", "backbones.0.blocks.9.mlp.fc2.bias", "backbones.0.blocks.10.norm1.weight", "backbones.0.blocks.10.norm1.bias", "backbones.0.blocks.10.attn.qkv.weight", "backbones.0.blocks.10.attn.qkv.bias", "backbones.0.blocks.10.attn.proj.weight", "backbones.0.blocks.10.attn.proj.bias", "backbones.0.blocks.10.norm2.weight", "backbones.0.blocks.10.norm2.bias", "backbones.0.blocks.10.mlp.fc1.weight", "backbones.0.blocks.10.mlp.fc1.bias", "backbones.0.blocks.10.mlp.fc2.weight", "backbones.0.blocks.10.mlp.fc2.bias", "backbones.0.blocks.11.norm1.weight", "backbones.0.blocks.11.norm1.bias", "backbones.0.blocks.11.attn.qkv.weight", "backbones.0.blocks.11.attn.qkv.bias", "backbones.0.blocks.11.attn.proj.weight", "backbones.0.blocks.11.attn.proj.bias", "backbones.0.blocks.11.norm2.weight", "backbones.0.blocks.11.norm2.bias", "backbones.0.blocks.11.mlp.fc1.weight", "backbones.0.blocks.11.mlp.fc1.bias", "backbones.0.blocks.11.mlp.fc2.weight", "backbones.0.blocks.11.mlp.fc2.bias", "backbones.0.norm.weight", "backbones.0.norm.bias", "backbones.1.cls_token", "backbones.1.pos_embed", "backbones.1.patch_embed.proj.weight", "backbones.1.patch_embed.proj.bias", "backbones.1.blocks.0.norm1.weight", "backbones.1.blocks.0.norm1.bias", "backbones.1.blocks.0.attn.q_proj.weight", "backbones.1.blocks.0.attn.q_proj.bias", "backbones.1.blocks.0.attn.v_proj.weight", "backbones.1.blocks.0.attn.v_proj.bias", "backbones.1.blocks.0.attn.k_proj.weight", "backbones.1.blocks.0.attn.k_proj.bias", "backbones.1.blocks.0.attn.proj.weight", "backbones.1.blocks.0.attn.proj.bias", "backbones.1.blocks.0.norm2.weight", "backbones.1.blocks.0.norm2.bias", "backbones.1.blocks.0.fc1.weight", "backbones.1.blocks.0.fc1.bias", "backbones.1.blocks.0.fc2.weight", "backbones.1.blocks.0.fc2.bias", "backbones.1.blocks.0.adaptmlp.down_proj.weight", "backbones.1.blocks.0.adaptmlp.down_proj.bias", "backbones.1.blocks.0.adaptmlp.up_proj.weight", "backbones.1.blocks.0.adaptmlp.up_proj.bias", "backbones.1.blocks.1.norm1.weight", "backbones.1.blocks.1.norm1.bias", "backbones.1.blocks.1.attn.q_proj.weight", "backbones.1.blocks.1.attn.q_proj.bias", "backbones.1.blocks.1.attn.v_proj.weight", "backbones.1.blocks.1.attn.v_proj.bias", "backbones.1.blocks.1.attn.k_proj.weight", "backbones.1.blocks.1.attn.k_proj.bias", "backbones.1.blocks.1.attn.proj.weight", "backbones.1.blocks.1.attn.proj.bias", "backbones.1.blocks.1.norm2.weight", "backbones.1.blocks.1.norm2.bias", "backbones.1.blocks.1.fc1.weight", "backbones.1.blocks.1.fc1.bias", "backbones.1.blocks.1.fc2.weight", "backbones.1.blocks.1.fc2.bias", "backbones.1.blocks.1.adaptmlp.down_proj.weight", "backbones.1.blocks.1.adaptmlp.down_proj.bias", "backbones.1.blocks.1.adaptmlp.up_proj.weight", "backbones.1.blocks.1.adaptmlp.up_proj.bias", "backbones.1.blocks.2.norm1.weight", "backbones.1.blocks.2.norm1.bias", "backbones.1.blocks.2.attn.q_proj.weight", "backbones.1.blocks.2.attn.q_proj.bias", "backbones.1.blocks.2.attn.v_proj.weight", "backbones.1.blocks.2.attn.v_proj.bias", "backbones.1.blocks.2.attn.k_proj.weight", "backbones.1.blocks.2.attn.k_proj.bias", "backbones.1.blocks.2.attn.proj.weight", "backbones.1.blocks.2.attn.proj.bias", "backbones.1.blocks.2.norm2.weight", "backbones.1.blocks.2.norm2.bias", "backbones.1.blocks.2.fc1.weight", "backbones.1.blocks.2.fc1.bias", "backbones.1.blocks.2.fc2.weight", "backbones.1.blocks.2.fc2.bias", "backbones.1.blocks.2.adaptmlp.down_proj.weight", "backbones.1.blocks.2.adaptmlp.down_proj.bias", "backbones.1.blocks.2.adaptmlp.up_proj.weight", "backbones.1.blocks.2.adaptmlp.up_proj.bias", "backbones.1.blocks.3.norm1.weight", "backbones.1.blocks.3.norm1.bias", "backbones.1.blocks.3.attn.q_proj.weight", "backbones.1.blocks.3.attn.q_proj.bias", "backbones.1.blocks.3.attn.v_proj.weight", "backbones.1.blocks.3.attn.v_proj.bias", "backbones.1.blocks.3.attn.k_proj.weight", "backbones.1.blocks.3.attn.k_proj.bias", "backbones.1.blocks.3.attn.proj.weight", "backbones.1.blocks.3.attn.proj.bias", "backbones.1.blocks.3.norm2.weight", "backbones.1.blocks.3.norm2.bias", "backbones.1.blocks.3.fc1.weight", "backbones.1.blocks.3.fc1.bias", "backbones.1.blocks.3.fc2.weight", "backbones.1.blocks.3.fc2.bias", "backbones.1.blocks.3.adaptmlp.down_proj.weight", "backbones.1.blocks.3.adaptmlp.down_proj.bias", "backbones.1.blocks.3.adaptmlp.up_proj.weight", "backbones.1.blocks.3.adaptmlp.up_proj.bias", "backbones.1.blocks.4.norm1.weight", "backbones.1.blocks.4.norm1.bias", "backbones.1.blocks.4.attn.q_proj.weight", "backbones.1.blocks.4.attn.q_proj.bias", "backbones.1.blocks.4.attn.v_proj.weight", "backbones.1.blocks.4.attn.v_proj.bias", "backbones.1.blocks.4.attn.k_proj.weight", "backbones.1.blocks.4.attn.k_proj.bias", "backbones.1.blocks.4.attn.proj.weight", "backbones.1.blocks.4.attn.proj.bias", "backbones.1.blocks.4.norm2.weight", "backbones.1.blocks.4.norm2.bias", "backbones.1.blocks.4.fc1.weight", "backbones.1.blocks.4.fc1.bias", "backbones.1.blocks.4.fc2.weight", "backbones.1.blocks.4.fc2.bias", "backbones.1.blocks.4.adaptmlp.down_proj.weight", "backbones.1.blocks.4.adaptmlp.down_proj.bias", "backbones.1.blocks.4.adaptmlp.up_proj.weight", "backbones.1.blocks.4.adaptmlp.up_proj.bias", "backbones.1.blocks.5.norm1.weight", "backbones.1.blocks.5.norm1.bias", "backbones.1.blocks.5.attn.q_proj.weight", "backbones.1.blocks.5.attn.q_proj.bias", "backbones.1.blocks.5.attn.v_proj.weight", "backbones.1.blocks.5.attn.v_proj.bias", "backbones.1.blocks.5.attn.k_proj.weight", "backbones.1.blocks.5.attn.k_proj.bias", "backbones.1.blocks.5.attn.proj.weight", "backbones.1.blocks.5.attn.proj.bias", "backbones.1.blocks.5.norm2.weight", "backbones.1.blocks.5.norm2.bias", "backbones.1.blocks.5.fc1.weight", "backbones.1.blocks.5.fc1.bias", "backbones.1.blocks.5.fc2.weight", "backbones.1.blocks.5.fc2.bias", "backbones.1.blocks.5.adaptmlp.down_proj.weight", "backbones.1.blocks.5.adaptmlp.down_proj.bias", "backbones.1.blocks.5.adaptmlp.up_proj.weight", "backbones.1.blocks.5.adaptmlp.up_proj.bias", "backbones.1.blocks.6.norm1.weight", "backbones.1.blocks.6.norm1.bias", "backbones.1.blocks.6.attn.q_proj.weight", "backbones.1.blocks.6.attn.q_proj.bias", "backbones.1.blocks.6.attn.v_proj.weight", "backbones.1.blocks.6.attn.v_proj.bias", "backbones.1.blocks.6.attn.k_proj.weight", "backbones.1.blocks.6.attn.k_proj.bias", "backbones.1.blocks.6.attn.proj.weight", "backbones.1.blocks.6.attn.proj.bias", "backbones.1.blocks.6.norm2.weight", "backbones.1.blocks.6.norm2.bias", "backbones.1.blocks.6.fc1.weight", "backbones.1.blocks.6.fc1.bias", "backbones.1.blocks.6.fc2.weight", "backbones.1.blocks.6.fc2.bias", "backbones.1.blocks.6.adaptmlp.down_proj.weight", "backbones.1.blocks.6.adaptmlp.down_proj.bias", "backbones.1.blocks.6.adaptmlp.up_proj.weight", "backbones.1.blocks.6.adaptmlp.up_proj.bias", "backbones.1.blocks.7.norm1.weight", "backbones.1.blocks.7.norm1.bias", "backbones.1.blocks.7.attn.q_proj.weight", "backbones.1.blocks.7.attn.q_proj.bias", "backbones.1.blocks.7.attn.v_proj.weight", "backbones.1.blocks.7.attn.v_proj.bias", "backbones.1.blocks.7.attn.k_proj.weight", "backbones.1.blocks.7.attn.k_proj.bias", "backbones.1.blocks.7.attn.proj.weight", "backbones.1.blocks.7.attn.proj.bias", "backbones.1.blocks.7.norm2.weight", "backbones.1.blocks.7.norm2.bias", "backbones.1.blocks.7.fc1.weight", "backbones.1.blocks.7.fc1.bias", "backbones.1.blocks.7.fc2.weight", "backbones.1.blocks.7.fc2.bias", "backbones.1.blocks.7.adaptmlp.down_proj.weight", "backbones.1.blocks.7.adaptmlp.down_proj.bias", "backbones.1.blocks.7.adaptmlp.up_proj.weight", "backbones.1.blocks.7.adaptmlp.up_proj.bias", "backbones.1.blocks.8.norm1.weight", "backbones.1.blocks.8.norm1.bias", "backbones.1.blocks.8.attn.q_proj.weight", "backbones.1.blocks.8.attn.q_proj.bias", "backbones.1.blocks.8.attn.v_proj.weight", "backbones.1.blocks.8.attn.v_proj.bias", "backbones.1.blocks.8.attn.k_proj.weight", "backbones.1.blocks.8.attn.k_proj.bias", "backbones.1.blocks.8.attn.proj.weight", "backbones.1.blocks.8.attn.proj.bias", "backbones.1.blocks.8.norm2.weight", "backbones.1.blocks.8.norm2.bias", "backbones.1.blocks.8.fc1.weight", "backbones.1.blocks.8.fc1.bias", "backbones.1.blocks.8.fc2.weight", "backbones.1.blocks.8.fc2.bias", "backbones.1.blocks.8.adaptmlp.down_proj.weight", "backbones.1.blocks.8.adaptmlp.down_proj.bias", "backbones.1.blocks.8.adaptmlp.up_proj.weight", "backbones.1.blocks.8.adaptmlp.up_proj.bias", "backbones.1.blocks.9.norm1.weight", "backbones.1.blocks.9.norm1.bias", "backbones.1.blocks.9.attn.q_proj.weight", "backbones.1.blocks.9.attn.q_proj.bias", "backbones.1.blocks.9.attn.v_proj.weight", "backbones.1.blocks.9.attn.v_proj.bias", "backbones.1.blocks.9.attn.k_proj.weight", "backbones.1.blocks.9.attn.k_proj.bias", "backbones.1.blocks.9.attn.proj.weight", "backbones.1.blocks.9.attn.proj.bias", "backbones.1.blocks.9.norm2.weight", "backbones.1.blocks.9.norm2.bias", "backbones.1.blocks.9.fc1.weight", "backbones.1.blocks.9.fc1.bias", "backbones.1.blocks.9.fc2.weight", "backbones.1.blocks.9.fc2.bias", "backbones.1.blocks.9.adaptmlp.down_proj.weight", "backbones.1.blocks.9.adaptmlp.down_proj.bias", "backbones.1.blocks.9.adaptmlp.up_proj.weight", "backbones.1.blocks.9.adaptmlp.up_proj.bias", "backbones.1.blocks.10.norm1.weight", "backbones.1.blocks.10.norm1.bias", "backbones.1.blocks.10.attn.q_proj.weight", "backbones.1.blocks.10.attn.q_proj.bias", "backbones.1.blocks.10.attn.v_proj.weight", "backbones.1.blocks.10.attn.v_proj.bias", "backbones.1.blocks.10.attn.k_proj.weight", "backbones.1.blocks.10.attn.k_proj.bias", "backbones.1.blocks.10.attn.proj.weight", "backbones.1.blocks.10.attn.proj.bias", "backbones.1.blocks.10.norm2.weight", "backbones.1.blocks.10.norm2.bias", "backbones.1.blocks.10.fc1.weight", "backbones.1.blocks.10.fc1.bias", "backbones.1.blocks.10.fc2.weight", "backbones.1.blocks.10.fc2.bias", "backbones.1.blocks.10.adaptmlp.down_proj.weight", "backbones.1.blocks.10.adaptmlp.down_proj.bias", "backbones.1.blocks.10.adaptmlp.up_proj.weight", "backbones.1.blocks.10.adaptmlp.up_proj.bias", "backbones.1.blocks.11.norm1.weight", "backbones.1.blocks.11.norm1.bias", "backbones.1.blocks.11.attn.q_proj.weight", "backbones.1.blocks.11.attn.q_proj.bias", "backbones.1.blocks.11.attn.v_proj.weight", "backbones.1.blocks.11.attn.v_proj.bias", "backbones.1.blocks.11.attn.k_proj.weight", "backbones.1.blocks.11.attn.k_proj.bias", "backbones.1.blocks.11.attn.proj.weight", "backbones.1.blocks.11.attn.proj.bias", "backbones.1.blocks.11.norm2.weight", "backbones.1.blocks.11.norm2.bias", "backbones.1.blocks.11.fc1.weight", "backbones.1.blocks.11.fc1.bias", "backbones.1.blocks.11.fc2.weight", "backbones.1.blocks.11.fc2.bias", "backbones.1.blocks.11.adaptmlp.down_proj.weight", "backbones.1.blocks.11.adaptmlp.down_proj.bias", "backbones.1.blocks.11.adaptmlp.up_proj.weight", "backbones.1.blocks.11.adaptmlp.up_proj.bias", "backbones.1.norm.weight", "backbones.1.norm.bias", "fc.weight", "fc.sigma". 

RuntimeError: Error(s) in loading state_dict for SimpleVitNet:
	Missing key(s) in state_dict: "backbone.cls_token", "backbone.pos_embed", "backbone.patch_embed.proj.weight", "backbone.patch_embed.proj.bias", "backbone.blocks.0.norm1.weight", "backbone.blocks.0.norm1.bias", "backbone.blocks.0.attn.q_proj.weight", "backbone.blocks.0.attn.q_proj.bias", "backbone.blocks.0.attn.v_proj.weight", "backbone.blocks.0.attn.v_proj.bias", "backbone.blocks.0.attn.k_proj.weight", "backbone.blocks.0.attn.k_proj.bias", "backbone.blocks.0.attn.proj.weight", "backbone.blocks.0.attn.proj.bias", "backbone.blocks.0.norm2.weight", "backbone.blocks.0.norm2.bias", "backbone.blocks.0.fc1.weight", "backbone.blocks.0.fc1.bias", "backbone.blocks.0.fc2.weight", "backbone.blocks.0.fc2.bias", "backbone.blocks.0.adaptmlp.down_proj.weight", "backbone.blocks.0.adaptmlp.down_proj.bias", "backbone.blocks.0.adaptmlp.up_proj.weight", "backbone.blocks.0.adaptmlp.up_proj.bias", "backbone.blocks.1.norm1.weight", "backbone.blocks.1.norm1.bias", "backbone.blocks.1.attn.q_proj.weight", "backbone.blocks.1.attn.q_proj.bias", "backbone.blocks.1.attn.v_proj.weight", "backbone.blocks.1.attn.v_proj.bias", "backbone.blocks.1.attn.k_proj.weight", "backbone.blocks.1.attn.k_proj.bias", "backbone.blocks.1.attn.proj.weight", "backbone.blocks.1.attn.proj.bias", "backbone.blocks.1.norm2.weight", "backbone.blocks.1.norm2.bias", "backbone.blocks.1.fc1.weight", "backbone.blocks.1.fc1.bias", "backbone.blocks.1.fc2.weight", "backbone.blocks.1.fc2.bias", "backbone.blocks.1.adaptmlp.down_proj.weight", "backbone.blocks.1.adaptmlp.down_proj.bias", "backbone.blocks.1.adaptmlp.up_proj.weight", "backbone.blocks.1.adaptmlp.up_proj.bias", "backbone.blocks.2.norm1.weight", "backbone.blocks.2.norm1.bias", "backbone.blocks.2.attn.q_proj.weight", "backbone.blocks.2.attn.q_proj.bias", "backbone.blocks.2.attn.v_proj.weight", "backbone.blocks.2.attn.v_proj.bias", "backbone.blocks.2.attn.k_proj.weight", "backbone.blocks.2.attn.k_proj.bias", "backbone.blocks.2.attn.proj.weight", "backbone.blocks.2.attn.proj.bias", "backbone.blocks.2.norm2.weight", "backbone.blocks.2.norm2.bias", "backbone.blocks.2.fc1.weight", "backbone.blocks.2.fc1.bias", "backbone.blocks.2.fc2.weight", "backbone.blocks.2.fc2.bias", "backbone.blocks.2.adaptmlp.down_proj.weight", "backbone.blocks.2.adaptmlp.down_proj.bias", "backbone.blocks.2.adaptmlp.up_proj.weight", "backbone.blocks.2.adaptmlp.up_proj.bias", "backbone.blocks.3.norm1.weight", "backbone.blocks.3.norm1.bias", "backbone.blocks.3.attn.q_proj.weight", "backbone.blocks.3.attn.q_proj.bias", "backbone.blocks.3.attn.v_proj.weight", "backbone.blocks.3.attn.v_proj.bias", "backbone.blocks.3.attn.k_proj.weight", "backbone.blocks.3.attn.k_proj.bias", "backbone.blocks.3.attn.proj.weight", "backbone.blocks.3.attn.proj.bias", "backbone.blocks.3.norm2.weight", "backbone.blocks.3.norm2.bias", "backbone.blocks.3.fc1.weight", "backbone.blocks.3.fc1.bias", "backbone.blocks.3.fc2.weight", "backbone.blocks.3.fc2.bias", "backbone.blocks.3.adaptmlp.down_proj.weight", "backbone.blocks.3.adaptmlp.down_proj.bias", "backbone.blocks.3.adaptmlp.up_proj.weight", "backbone.blocks.3.adaptmlp.up_proj.bias", "backbone.blocks.4.norm1.weight", "backbone.blocks.4.norm1.bias", "backbone.blocks.4.attn.q_proj.weight", "backbone.blocks.4.attn.q_proj.bias", "backbone.blocks.4.attn.v_proj.weight", "backbone.blocks.4.attn.v_proj.bias", "backbone.blocks.4.attn.k_proj.weight", "backbone.blocks.4.attn.k_proj.bias", "backbone.blocks.4.attn.proj.weight", "backbone.blocks.4.attn.proj.bias", "backbone.blocks.4.norm2.weight", "backbone.blocks.4.norm2.bias", "backbone.blocks.4.fc1.weight", "backbone.blocks.4.fc1.bias", "backbone.blocks.4.fc2.weight", "backbone.blocks.4.fc2.bias", "backbone.blocks.4.adaptmlp.down_proj.weight", "backbone.blocks.4.adaptmlp.down_proj.bias", "backbone.blocks.4.adaptmlp.up_proj.weight", "backbone.blocks.4.adaptmlp.up_proj.bias", "backbone.blocks.5.norm1.weight", "backbone.blocks.5.norm1.bias", "backbone.blocks.5.attn.q_proj.weight", "backbone.blocks.5.attn.q_proj.bias", "backbone.blocks.5.attn.v_proj.weight", "backbone.blocks.5.attn.v_proj.bias", "backbone.blocks.5.attn.k_proj.weight", "backbone.blocks.5.attn.k_proj.bias", "backbone.blocks.5.attn.proj.weight", "backbone.blocks.5.attn.proj.bias", "backbone.blocks.5.norm2.weight", "backbone.blocks.5.norm2.bias", "backbone.blocks.5.fc1.weight", "backbone.blocks.5.fc1.bias", "backbone.blocks.5.fc2.weight", "backbone.blocks.5.fc2.bias", "backbone.blocks.5.adaptmlp.down_proj.weight", "backbone.blocks.5.adaptmlp.down_proj.bias", "backbone.blocks.5.adaptmlp.up_proj.weight", "backbone.blocks.5.adaptmlp.up_proj.bias", "backbone.blocks.6.norm1.weight", "backbone.blocks.6.norm1.bias", "backbone.blocks.6.attn.q_proj.weight", "backbone.blocks.6.attn.q_proj.bias", "backbone.blocks.6.attn.v_proj.weight", "backbone.blocks.6.attn.v_proj.bias", "backbone.blocks.6.attn.k_proj.weight", "backbone.blocks.6.attn.k_proj.bias", "backbone.blocks.6.attn.proj.weight", "backbone.blocks.6.attn.proj.bias", "backbone.blocks.6.norm2.weight", "backbone.blocks.6.norm2.bias", "backbone.blocks.6.fc1.weight", "backbone.blocks.6.fc1.bias", "backbone.blocks.6.fc2.weight", "backbone.blocks.6.fc2.bias", "backbone.blocks.6.adaptmlp.down_proj.weight", "backbone.blocks.6.adaptmlp.down_proj.bias", "backbone.blocks.6.adaptmlp.up_proj.weight", "backbone.blocks.6.adaptmlp.up_proj.bias", "backbone.blocks.7.norm1.weight", "backbone.blocks.7.norm1.bias", "backbone.blocks.7.attn.q_proj.weight", "backbone.blocks.7.attn.q_proj.bias", "backbone.blocks.7.attn.v_proj.weight", "backbone.blocks.7.attn.v_proj.bias", "backbone.blocks.7.attn.k_proj.weight", "backbone.blocks.7.attn.k_proj.bias", "backbone.blocks.7.attn.proj.weight", "backbone.blocks.7.attn.proj.bias", "backbone.blocks.7.norm2.weight", "backbone.blocks.7.norm2.bias", "backbone.blocks.7.fc1.weight", "backbone.blocks.7.fc1.bias", "backbone.blocks.7.fc2.weight", "backbone.blocks.7.fc2.bias", "backbone.blocks.7.adaptmlp.down_proj.weight", "backbone.blocks.7.adaptmlp.down_proj.bias", "backbone.blocks.7.adaptmlp.up_proj.weight", "backbone.blocks.7.adaptmlp.up_proj.bias", "backbone.blocks.8.norm1.weight", "backbone.blocks.8.norm1.bias", "backbone.blocks.8.attn.q_proj.weight", "backbone.blocks.8.attn.q_proj.bias", "backbone.blocks.8.attn.v_proj.weight", "backbone.blocks.8.attn.v_proj.bias", "backbone.blocks.8.attn.k_proj.weight", "backbone.blocks.8.attn.k_proj.bias", "backbone.blocks.8.attn.proj.weight", "backbone.blocks.8.attn.proj.bias", "backbone.blocks.8.norm2.weight", "backbone.blocks.8.norm2.bias", "backbone.blocks.8.fc1.weight", "backbone.blocks.8.fc1.bias", "backbone.blocks.8.fc2.weight", "backbone.blocks.8.fc2.bias", "backbone.blocks.8.adaptmlp.down_proj.weight", "backbone.blocks.8.adaptmlp.down_proj.bias", "backbone.blocks.8.adaptmlp.up_proj.weight", "backbone.blocks.8.adaptmlp.up_proj.bias", "backbone.blocks.9.norm1.weight", "backbone.blocks.9.norm1.bias", "backbone.blocks.9.attn.q_proj.weight", "backbone.blocks.9.attn.q_proj.bias", "backbone.blocks.9.attn.v_proj.weight", "backbone.blocks.9.attn.v_proj.bias", "backbone.blocks.9.attn.k_proj.weight", "backbone.blocks.9.attn.k_proj.bias", "backbone.blocks.9.attn.proj.weight", "backbone.blocks.9.attn.proj.bias", "backbone.blocks.9.norm2.weight", "backbone.blocks.9.norm2.bias", "backbone.blocks.9.fc1.weight", "backbone.blocks.9.fc1.bias", "backbone.blocks.9.fc2.weight", "backbone.blocks.9.fc2.bias", "backbone.blocks.9.adaptmlp.down_proj.weight", "backbone.blocks.9.adaptmlp.down_proj.bias", "backbone.blocks.9.adaptmlp.up_proj.weight", "backbone.blocks.9.adaptmlp.up_proj.bias", "backbone.blocks.10.norm1.weight", "backbone.blocks.10.norm1.bias", "backbone.blocks.10.attn.q_proj.weight", "backbone.blocks.10.attn.q_proj.bias", "backbone.blocks.10.attn.v_proj.weight", "backbone.blocks.10.attn.v_proj.bias", "backbone.blocks.10.attn.k_proj.weight", "backbone.blocks.10.attn.k_proj.bias", "backbone.blocks.10.attn.proj.weight", "backbone.blocks.10.attn.proj.bias", "backbone.blocks.10.norm2.weight", "backbone.blocks.10.norm2.bias", "backbone.blocks.10.fc1.weight", "backbone.blocks.10.fc1.bias", "backbone.blocks.10.fc2.weight", "backbone.blocks.10.fc2.bias", "backbone.blocks.10.adaptmlp.down_proj.weight", "backbone.blocks.10.adaptmlp.down_proj.bias", "backbone.blocks.10.adaptmlp.up_proj.weight", "backbone.blocks.10.adaptmlp.up_proj.bias", "backbone.blocks.11.norm1.weight", "backbone.blocks.11.norm1.bias", "backbone.blocks.11.attn.q_proj.weight", "backbone.blocks.11.attn.q_proj.bias", "backbone.blocks.11.attn.v_proj.weight", "backbone.blocks.11.attn.v_proj.bias", "backbone.blocks.11.attn.k_proj.weight", "backbone.blocks.11.attn.k_proj.bias", "backbone.blocks.11.attn.proj.weight", "backbone.blocks.11.attn.proj.bias", "backbone.blocks.11.norm2.weight", "backbone.blocks.11.norm2.bias", "backbone.blocks.11.fc1.weight", "backbone.blocks.11.fc1.bias", "backbone.blocks.11.fc2.weight", "backbone.blocks.11.fc2.bias", "backbone.blocks.11.adaptmlp.down_proj.weight", "backbone.blocks.11.adaptmlp.down_proj.bias", "backbone.blocks.11.adaptmlp.up_proj.weight", "backbone.blocks.11.adaptmlp.up_proj.bias", "backbone.norm.weight", "backbone.norm.bias". 
	Unexpected key(s) in state_dict: "backbones.0.cls_token", "backbones.0.pos_embed", "backbones.0.patch_embed.proj.weight", "backbones.0.patch_embed.proj.bias", "backbones.0.blocks.0.norm1.weight", "backbones.0.blocks.0.norm1.bias", "backbones.0.blocks.0.attn.qkv.weight", "backbones.0.blocks.0.attn.qkv.bias", "backbones.0.blocks.0.attn.proj.weight", "backbones.0.blocks.0.attn.proj.bias", "backbones.0.blocks.0.norm2.weight", "backbones.0.blocks.0.norm2.bias", "backbones.0.blocks.0.mlp.fc1.weight", "backbones.0.blocks.0.mlp.fc1.bias", "backbones.0.blocks.0.mlp.fc2.weight", "backbones.0.blocks.0.mlp.fc2.bias", "backbones.0.blocks.1.norm1.weight", "backbones.0.blocks.1.norm1.bias", "backbones.0.blocks.1.attn.qkv.weight", "backbones.0.blocks.1.attn.qkv.bias", "backbones.0.blocks.1.attn.proj.weight", "backbones.0.blocks.1.attn.proj.bias", "backbones.0.blocks.1.norm2.weight", "backbones.0.blocks.1.norm2.bias", "backbones.0.blocks.1.mlp.fc1.weight", "backbones.0.blocks.1.mlp.fc1.bias", "backbones.0.blocks.1.mlp.fc2.weight", "backbones.0.blocks.1.mlp.fc2.bias", "backbones.0.blocks.2.norm1.weight", "backbones.0.blocks.2.norm1.bias", "backbones.0.blocks.2.attn.qkv.weight", "backbones.0.blocks.2.attn.qkv.bias", "backbones.0.blocks.2.attn.proj.weight", "backbones.0.blocks.2.attn.proj.bias", "backbones.0.blocks.2.norm2.weight", "backbones.0.blocks.2.norm2.bias", "backbones.0.blocks.2.mlp.fc1.weight", "backbones.0.blocks.2.mlp.fc1.bias", "backbones.0.blocks.2.mlp.fc2.weight", "backbones.0.blocks.2.mlp.fc2.bias", "backbones.0.blocks.3.norm1.weight", "backbones.0.blocks.3.norm1.bias", "backbones.0.blocks.3.attn.qkv.weight", "backbones.0.blocks.3.attn.qkv.bias", "backbones.0.blocks.3.attn.proj.weight", "backbones.0.blocks.3.attn.proj.bias", "backbones.0.blocks.3.norm2.weight", "backbones.0.blocks.3.norm2.bias", "backbones.0.blocks.3.mlp.fc1.weight", "backbones.0.blocks.3.mlp.fc1.bias", "backbones.0.blocks.3.mlp.fc2.weight", "backbones.0.blocks.3.mlp.fc2.bias", "backbones.0.blocks.4.norm1.weight", "backbones.0.blocks.4.norm1.bias", "backbones.0.blocks.4.attn.qkv.weight", "backbones.0.blocks.4.attn.qkv.bias", "backbones.0.blocks.4.attn.proj.weight", "backbones.0.blocks.4.attn.proj.bias", "backbones.0.blocks.4.norm2.weight", "backbones.0.blocks.4.norm2.bias", "backbones.0.blocks.4.mlp.fc1.weight", "backbones.0.blocks.4.mlp.fc1.bias", "backbones.0.blocks.4.mlp.fc2.weight", "backbones.0.blocks.4.mlp.fc2.bias", "backbones.0.blocks.5.norm1.weight", "backbones.0.blocks.5.norm1.bias", "backbones.0.blocks.5.attn.qkv.weight", "backbones.0.blocks.5.attn.qkv.bias", "backbones.0.blocks.5.attn.proj.weight", "backbones.0.blocks.5.attn.proj.bias", "backbones.0.blocks.5.norm2.weight", "backbones.0.blocks.5.norm2.bias", "backbones.0.blocks.5.mlp.fc1.weight", "backbones.0.blocks.5.mlp.fc1.bias", "backbones.0.blocks.5.mlp.fc2.weight", "backbones.0.blocks.5.mlp.fc2.bias", "backbones.0.blocks.6.norm1.weight", "backbones.0.blocks.6.norm1.bias", "backbones.0.blocks.6.attn.qkv.weight", "backbones.0.blocks.6.attn.qkv.bias", "backbones.0.blocks.6.attn.proj.weight", "backbones.0.blocks.6.attn.proj.bias", "backbones.0.blocks.6.norm2.weight", "backbones.0.blocks.6.norm2.bias", "backbones.0.blocks.6.mlp.fc1.weight", "backbones.0.blocks.6.mlp.fc1.bias", "backbones.0.blocks.6.mlp.fc2.weight", "backbones.0.blocks.6.mlp.fc2.bias", "backbones.0.blocks.7.norm1.weight", "backbones.0.blocks.7.norm1.bias", "backbones.0.blocks.7.attn.qkv.weight", "backbones.0.blocks.7.attn.qkv.bias", "backbones.0.blocks.7.attn.proj.weight", "backbones.0.blocks.7.attn.proj.bias", "backbones.0.blocks.7.norm2.weight", "backbones.0.blocks.7.norm2.bias", "backbones.0.blocks.7.mlp.fc1.weight", "backbones.0.blocks.7.mlp.fc1.bias", "backbones.0.blocks.7.mlp.fc2.weight", "backbones.0.blocks.7.mlp.fc2.bias", "backbones.0.blocks.8.norm1.weight", "backbones.0.blocks.8.norm1.bias", "backbones.0.blocks.8.attn.qkv.weight", "backbones.0.blocks.8.attn.qkv.bias", "backbones.0.blocks.8.attn.proj.weight", "backbones.0.blocks.8.attn.proj.bias", "backbones.0.blocks.8.norm2.weight", "backbones.0.blocks.8.norm2.bias", "backbones.0.blocks.8.mlp.fc1.weight", "backbones.0.blocks.8.mlp.fc1.bias", "backbones.0.blocks.8.mlp.fc2.weight", "backbones.0.blocks.8.mlp.fc2.bias", "backbones.0.blocks.9.norm1.weight", "backbones.0.blocks.9.norm1.bias", "backbones.0.blocks.9.attn.qkv.weight", "backbones.0.blocks.9.attn.qkv.bias", "backbones.0.blocks.9.attn.proj.weight", "backbones.0.blocks.9.attn.proj.bias", "backbones.0.blocks.9.norm2.weight", "backbones.0.blocks.9.norm2.bias", "backbones.0.blocks.9.mlp.fc1.weight", "backbones.0.blocks.9.mlp.fc1.bias", "backbones.0.blocks.9.mlp.fc2.weight", "backbones.0.blocks.9.mlp.fc2.bias", "backbones.0.blocks.10.norm1.weight", "backbones.0.blocks.10.norm1.bias", "backbones.0.blocks.10.attn.qkv.weight", "backbones.0.blocks.10.attn.qkv.bias", "backbones.0.blocks.10.attn.proj.weight", "backbones.0.blocks.10.attn.proj.bias", "backbones.0.blocks.10.norm2.weight", "backbones.0.blocks.10.norm2.bias", "backbones.0.blocks.10.mlp.fc1.weight", "backbones.0.blocks.10.mlp.fc1.bias", "backbones.0.blocks.10.mlp.fc2.weight", "backbones.0.blocks.10.mlp.fc2.bias", "backbones.0.blocks.11.norm1.weight", "backbones.0.blocks.11.norm1.bias", "backbones.0.blocks.11.attn.qkv.weight", "backbones.0.blocks.11.attn.qkv.bias", "backbones.0.blocks.11.attn.proj.weight", "backbones.0.blocks.11.attn.proj.bias", "backbones.0.blocks.11.norm2.weight", "backbones.0.blocks.11.norm2.bias", "backbones.0.blocks.11.mlp.fc1.weight", "backbones.0.blocks.11.mlp.fc1.bias", "backbones.0.blocks.11.mlp.fc2.weight", "backbones.0.blocks.11.mlp.fc2.bias", "backbones.0.norm.weight", "backbones.0.norm.bias", "backbones.1.cls_token", "backbones.1.pos_embed", "backbones.1.patch_embed.proj.weight", "backbones.1.patch_embed.proj.bias", "backbones.1.blocks.0.norm1.weight", "backbones.1.blocks.0.norm1.bias", "backbones.1.blocks.0.attn.q_proj.weight", "backbones.1.blocks.0.attn.q_proj.bias", "backbones.1.blocks.0.attn.v_proj.weight", "backbones.1.blocks.0.attn.v_proj.bias", "backbones.1.blocks.0.attn.k_proj.weight", "backbones.1.blocks.0.attn.k_proj.bias", "backbones.1.blocks.0.attn.proj.weight", "backbones.1.blocks.0.attn.proj.bias", "backbones.1.blocks.0.norm2.weight", "backbones.1.blocks.0.norm2.bias", "backbones.1.blocks.0.fc1.weight", "backbones.1.blocks.0.fc1.bias", "backbones.1.blocks.0.fc2.weight", "backbones.1.blocks.0.fc2.bias", "backbones.1.blocks.0.adaptmlp.down_proj.weight", "backbones.1.blocks.0.adaptmlp.down_proj.bias", "backbones.1.blocks.0.adaptmlp.up_proj.weight", "backbones.1.blocks.0.adaptmlp.up_proj.bias", "backbones.1.blocks.1.norm1.weight", "backbones.1.blocks.1.norm1.bias", "backbones.1.blocks.1.attn.q_proj.weight", "backbones.1.blocks.1.attn.q_proj.bias", "backbones.1.blocks.1.attn.v_proj.weight", "backbones.1.blocks.1.attn.v_proj.bias", "backbones.1.blocks.1.attn.k_proj.weight", "backbones.1.blocks.1.attn.k_proj.bias", "backbones.1.blocks.1.attn.proj.weight", "backbones.1.blocks.1.attn.proj.bias", "backbones.1.blocks.1.norm2.weight", "backbones.1.blocks.1.norm2.bias", "backbones.1.blocks.1.fc1.weight", "backbones.1.blocks.1.fc1.bias", "backbones.1.blocks.1.fc2.weight", "backbones.1.blocks.1.fc2.bias", "backbones.1.blocks.1.adaptmlp.down_proj.weight", "backbones.1.blocks.1.adaptmlp.down_proj.bias", "backbones.1.blocks.1.adaptmlp.up_proj.weight", "backbones.1.blocks.1.adaptmlp.up_proj.bias", "backbones.1.blocks.2.norm1.weight", "backbones.1.blocks.2.norm1.bias", "backbones.1.blocks.2.attn.q_proj.weight", "backbones.1.blocks.2.attn.q_proj.bias", "backbones.1.blocks.2.attn.v_proj.weight", "backbones.1.blocks.2.attn.v_proj.bias", "backbones.1.blocks.2.attn.k_proj.weight", "backbones.1.blocks.2.attn.k_proj.bias", "backbones.1.blocks.2.attn.proj.weight", "backbones.1.blocks.2.attn.proj.bias", "backbones.1.blocks.2.norm2.weight", "backbones.1.blocks.2.norm2.bias", "backbones.1.blocks.2.fc1.weight", "backbones.1.blocks.2.fc1.bias", "backbones.1.blocks.2.fc2.weight", "backbones.1.blocks.2.fc2.bias", "backbones.1.blocks.2.adaptmlp.down_proj.weight", "backbones.1.blocks.2.adaptmlp.down_proj.bias", "backbones.1.blocks.2.adaptmlp.up_proj.weight", "backbones.1.blocks.2.adaptmlp.up_proj.bias", "backbones.1.blocks.3.norm1.weight", "backbones.1.blocks.3.norm1.bias", "backbones.1.blocks.3.attn.q_proj.weight", "backbones.1.blocks.3.attn.q_proj.bias", "backbones.1.blocks.3.attn.v_proj.weight", "backbones.1.blocks.3.attn.v_proj.bias", "backbones.1.blocks.3.attn.k_proj.weight", "backbones.1.blocks.3.attn.k_proj.bias", "backbones.1.blocks.3.attn.proj.weight", "backbones.1.blocks.3.attn.proj.bias", "backbones.1.blocks.3.norm2.weight", "backbones.1.blocks.3.norm2.bias", "backbones.1.blocks.3.fc1.weight", "backbones.1.blocks.3.fc1.bias", "backbones.1.blocks.3.fc2.weight", "backbones.1.blocks.3.fc2.bias", "backbones.1.blocks.3.adaptmlp.down_proj.weight", "backbones.1.blocks.3.adaptmlp.down_proj.bias", "backbones.1.blocks.3.adaptmlp.up_proj.weight", "backbones.1.blocks.3.adaptmlp.up_proj.bias", "backbones.1.blocks.4.norm1.weight", "backbones.1.blocks.4.norm1.bias", "backbones.1.blocks.4.attn.q_proj.weight", "backbones.1.blocks.4.attn.q_proj.bias", "backbones.1.blocks.4.attn.v_proj.weight", "backbones.1.blocks.4.attn.v_proj.bias", "backbones.1.blocks.4.attn.k_proj.weight", "backbones.1.blocks.4.attn.k_proj.bias", "backbones.1.blocks.4.attn.proj.weight", "backbones.1.blocks.4.attn.proj.bias", "backbones.1.blocks.4.norm2.weight", "backbones.1.blocks.4.norm2.bias", "backbones.1.blocks.4.fc1.weight", "backbones.1.blocks.4.fc1.bias", "backbones.1.blocks.4.fc2.weight", "backbones.1.blocks.4.fc2.bias", "backbones.1.blocks.4.adaptmlp.down_proj.weight", "backbones.1.blocks.4.adaptmlp.down_proj.bias", "backbones.1.blocks.4.adaptmlp.up_proj.weight", "backbones.1.blocks.4.adaptmlp.up_proj.bias", "backbones.1.blocks.5.norm1.weight", "backbones.1.blocks.5.norm1.bias", "backbones.1.blocks.5.attn.q_proj.weight", "backbones.1.blocks.5.attn.q_proj.bias", "backbones.1.blocks.5.attn.v_proj.weight", "backbones.1.blocks.5.attn.v_proj.bias", "backbones.1.blocks.5.attn.k_proj.weight", "backbones.1.blocks.5.attn.k_proj.bias", "backbones.1.blocks.5.attn.proj.weight", "backbones.1.blocks.5.attn.proj.bias", "backbones.1.blocks.5.norm2.weight", "backbones.1.blocks.5.norm2.bias", "backbones.1.blocks.5.fc1.weight", "backbones.1.blocks.5.fc1.bias", "backbones.1.blocks.5.fc2.weight", "backbones.1.blocks.5.fc2.bias", "backbones.1.blocks.5.adaptmlp.down_proj.weight", "backbones.1.blocks.5.adaptmlp.down_proj.bias", "backbones.1.blocks.5.adaptmlp.up_proj.weight", "backbones.1.blocks.5.adaptmlp.up_proj.bias", "backbones.1.blocks.6.norm1.weight", "backbones.1.blocks.6.norm1.bias", "backbones.1.blocks.6.attn.q_proj.weight", "backbones.1.blocks.6.attn.q_proj.bias", "backbones.1.blocks.6.attn.v_proj.weight", "backbones.1.blocks.6.attn.v_proj.bias", "backbones.1.blocks.6.attn.k_proj.weight", "backbones.1.blocks.6.attn.k_proj.bias", "backbones.1.blocks.6.attn.proj.weight", "backbones.1.blocks.6.attn.proj.bias", "backbones.1.blocks.6.norm2.weight", "backbones.1.blocks.6.norm2.bias", "backbones.1.blocks.6.fc1.weight", "backbones.1.blocks.6.fc1.bias", "backbones.1.blocks.6.fc2.weight", "backbones.1.blocks.6.fc2.bias", "backbones.1.blocks.6.adaptmlp.down_proj.weight", "backbones.1.blocks.6.adaptmlp.down_proj.bias", "backbones.1.blocks.6.adaptmlp.up_proj.weight", "backbones.1.blocks.6.adaptmlp.up_proj.bias", "backbones.1.blocks.7.norm1.weight", "backbones.1.blocks.7.norm1.bias", "backbones.1.blocks.7.attn.q_proj.weight", "backbones.1.blocks.7.attn.q_proj.bias", "backbones.1.blocks.7.attn.v_proj.weight", "backbones.1.blocks.7.attn.v_proj.bias", "backbones.1.blocks.7.attn.k_proj.weight", "backbones.1.blocks.7.attn.k_proj.bias", "backbones.1.blocks.7.attn.proj.weight", "backbones.1.blocks.7.attn.proj.bias", "backbones.1.blocks.7.norm2.weight", "backbones.1.blocks.7.norm2.bias", "backbones.1.blocks.7.fc1.weight", "backbones.1.blocks.7.fc1.bias", "backbones.1.blocks.7.fc2.weight", "backbones.1.blocks.7.fc2.bias", "backbones.1.blocks.7.adaptmlp.down_proj.weight", "backbones.1.blocks.7.adaptmlp.down_proj.bias", "backbones.1.blocks.7.adaptmlp.up_proj.weight", "backbones.1.blocks.7.adaptmlp.up_proj.bias", "backbones.1.blocks.8.norm1.weight", "backbones.1.blocks.8.norm1.bias", "backbones.1.blocks.8.attn.q_proj.weight", "backbones.1.blocks.8.attn.q_proj.bias", "backbones.1.blocks.8.attn.v_proj.weight", "backbones.1.blocks.8.attn.v_proj.bias", "backbones.1.blocks.8.attn.k_proj.weight", "backbones.1.blocks.8.attn.k_proj.bias", "backbones.1.blocks.8.attn.proj.weight", "backbones.1.blocks.8.attn.proj.bias", "backbones.1.blocks.8.norm2.weight", "backbones.1.blocks.8.norm2.bias", "backbones.1.blocks.8.fc1.weight", "backbones.1.blocks.8.fc1.bias", "backbones.1.blocks.8.fc2.weight", "backbones.1.blocks.8.fc2.bias", "backbones.1.blocks.8.adaptmlp.down_proj.weight", "backbones.1.blocks.8.adaptmlp.down_proj.bias", "backbones.1.blocks.8.adaptmlp.up_proj.weight", "backbones.1.blocks.8.adaptmlp.up_proj.bias", "backbones.1.blocks.9.norm1.weight", "backbones.1.blocks.9.norm1.bias", "backbones.1.blocks.9.attn.q_proj.weight", "backbones.1.blocks.9.attn.q_proj.bias", "backbones.1.blocks.9.attn.v_proj.weight", "backbones.1.blocks.9.attn.v_proj.bias", "backbones.1.blocks.9.attn.k_proj.weight", "backbones.1.blocks.9.attn.k_proj.bias", "backbones.1.blocks.9.attn.proj.weight", "backbones.1.blocks.9.attn.proj.bias", "backbones.1.blocks.9.norm2.weight", "backbones.1.blocks.9.norm2.bias", "backbones.1.blocks.9.fc1.weight", "backbones.1.blocks.9.fc1.bias", "backbones.1.blocks.9.fc2.weight", "backbones.1.blocks.9.fc2.bias", "backbones.1.blocks.9.adaptmlp.down_proj.weight", "backbones.1.blocks.9.adaptmlp.down_proj.bias", "backbones.1.blocks.9.adaptmlp.up_proj.weight", "backbones.1.blocks.9.adaptmlp.up_proj.bias", "backbones.1.blocks.10.norm1.weight", "backbones.1.blocks.10.norm1.bias", "backbones.1.blocks.10.attn.q_proj.weight", "backbones.1.blocks.10.attn.q_proj.bias", "backbones.1.blocks.10.attn.v_proj.weight", "backbones.1.blocks.10.attn.v_proj.bias", "backbones.1.blocks.10.attn.k_proj.weight", "backbones.1.blocks.10.attn.k_proj.bias", "backbones.1.blocks.10.attn.proj.weight", "backbones.1.blocks.10.attn.proj.bias", "backbones.1.blocks.10.norm2.weight", "backbones.1.blocks.10.norm2.bias", "backbones.1.blocks.10.fc1.weight", "backbones.1.blocks.10.fc1.bias", "backbones.1.blocks.10.fc2.weight", "backbones.1.blocks.10.fc2.bias", "backbones.1.blocks.10.adaptmlp.down_proj.weight", "backbones.1.blocks.10.adaptmlp.down_proj.bias", "backbones.1.blocks.10.adaptmlp.up_proj.weight", "backbones.1.blocks.10.adaptmlp.up_proj.bias", "backbones.1.blocks.11.norm1.weight", "backbones.1.blocks.11.norm1.bias", "backbones.1.blocks.11.attn.q_proj.weight", "backbones.1.blocks.11.attn.q_proj.bias", "backbones.1.blocks.11.attn.v_proj.weight", "backbones.1.blocks.11.attn.v_proj.bias", "backbones.1.blocks.11.attn.k_proj.weight", "backbones.1.blocks.11.attn.k_proj.bias", "backbones.1.blocks.11.attn.proj.weight", "backbones.1.blocks.11.attn.proj.bias", "backbones.1.blocks.11.norm2.weight", "backbones.1.blocks.11.norm2.bias", "backbones.1.blocks.11.fc1.weight", "backbones.1.blocks.11.fc1.bias", "backbones.1.blocks.11.fc2.weight", "backbones.1.blocks.11.fc2.bias", "backbones.1.blocks.11.adaptmlp.down_proj.weight", "backbones.1.blocks.11.adaptmlp.down_proj.bias", "backbones.1.blocks.11.adaptmlp.up_proj.weight", "backbones.1.blocks.11.adaptmlp.up_proj.bias", "backbones.1.norm.weight", "backbones.1.norm.bias", "fc.weight", "fc.sigma". 

In [29]:
model._network.eval()

SimpleVitNet(
  (backbone): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='non

In [14]:
data_manager = DataManager(
        args["dataset"],
        args["shuffle"],
        args["seed"],
        args["init_cls"],
        args["increment"],
        args,
    )

Files already downloaded and verified
Files already downloaded and verified


In [30]:
model._network.to(args["device"][0])

SimpleVitNet(
  (backbone): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='non

In [31]:
model._cur_task += 1


In [32]:
model._total_classes = model._known_classes + data_manager.get_task_size(model._cur_task)

In [33]:
model._cur_task

0

In [34]:
model._total_classes

10

In [35]:
num_workers = 8
#total_classes = args["init_cls"] + args["increment"] * task
test_dataset = data_manager.get_dataset(np.arange(0, model._total_classes), source="test", mode="test" )
model.test_loader = DataLoader(test_dataset, batch_size=args["batch_size"], shuffle=False, num_workers=num_workers)


train_dataset = data_manager.get_dataset(np.arange(0, model._total_classes),source="train", mode="train")
model.train_dataset=train_dataset
model.data_manager=data_manager

In [36]:
train_dataset_for_protonet=data_manager.get_dataset(np.arange(0,model._total_classes),source="train", mode="test")
model.train_loader_for_protonet = DataLoader(train_dataset_for_protonet, batch_size=args["batch_size"], shuffle=True, num_workers=num_workers)


In [37]:
model._known_classes = model._total_classes

In [18]:
model.construct_dual_branch_network()

This is for the BaseNet initialization.
I'm using ViT with adapters.
_IncompatibleKeys(missing_keys=['blocks.0.adaptmlp.down_proj.weight', 'blocks.0.adaptmlp.down_proj.bias', 'blocks.0.adaptmlp.up_proj.weight', 'blocks.0.adaptmlp.up_proj.bias', 'blocks.1.adaptmlp.down_proj.weight', 'blocks.1.adaptmlp.down_proj.bias', 'blocks.1.adaptmlp.up_proj.weight', 'blocks.1.adaptmlp.up_proj.bias', 'blocks.2.adaptmlp.down_proj.weight', 'blocks.2.adaptmlp.down_proj.bias', 'blocks.2.adaptmlp.up_proj.weight', 'blocks.2.adaptmlp.up_proj.bias', 'blocks.3.adaptmlp.down_proj.weight', 'blocks.3.adaptmlp.down_proj.bias', 'blocks.3.adaptmlp.up_proj.weight', 'blocks.3.adaptmlp.up_proj.bias', 'blocks.4.adaptmlp.down_proj.weight', 'blocks.4.adaptmlp.down_proj.bias', 'blocks.4.adaptmlp.up_proj.weight', 'blocks.4.adaptmlp.up_proj.bias', 'blocks.5.adaptmlp.down_proj.weight', 'blocks.5.adaptmlp.down_proj.bias', 'blocks.5.adaptmlp.up_proj.weight', 'blocks.5.adaptmlp.up_proj.bias', 'blocks.6.adaptmlp.down_proj.weight

In [23]:
model.replace_fc(model.train_loader_for_protonet, model._network, None)
# sind die weights die gleichen -> dann fishy 
#wenn nicht dann sollte es passen

MultiBranchCosineIncrementalNet(
  (backbone): Identity()
  (backbones): ModuleList(
    (0): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=768, out_features=768, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(

In [21]:
optimizer=optim.AdamW(model._network.parameters(), lr=model.init_lr, weight_decay=model.weight_decay)
scheduler=optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args['tuned_epoch'], eta_min=model.min_lr)


This is for the BaseNet initialization.
I'm using ViT with adapters.
_IncompatibleKeys(missing_keys=['blocks.0.adaptmlp.down_proj.weight', 'blocks.0.adaptmlp.down_proj.bias', 'blocks.0.adaptmlp.up_proj.weight', 'blocks.0.adaptmlp.up_proj.bias', 'blocks.1.adaptmlp.down_proj.weight', 'blocks.1.adaptmlp.down_proj.bias', 'blocks.1.adaptmlp.up_proj.weight', 'blocks.1.adaptmlp.up_proj.bias', 'blocks.2.adaptmlp.down_proj.weight', 'blocks.2.adaptmlp.down_proj.bias', 'blocks.2.adaptmlp.up_proj.weight', 'blocks.2.adaptmlp.up_proj.bias', 'blocks.3.adaptmlp.down_proj.weight', 'blocks.3.adaptmlp.down_proj.bias', 'blocks.3.adaptmlp.up_proj.weight', 'blocks.3.adaptmlp.up_proj.bias', 'blocks.4.adaptmlp.down_proj.weight', 'blocks.4.adaptmlp.down_proj.bias', 'blocks.4.adaptmlp.up_proj.weight', 'blocks.4.adaptmlp.up_proj.bias', 'blocks.5.adaptmlp.down_proj.weight', 'blocks.5.adaptmlp.down_proj.bias', 'blocks.5.adaptmlp.up_proj.weight', 'blocks.5.adaptmlp.up_proj.bias', 'blocks.6.adaptmlp.down_proj.weight

In [19]:
model._network.update_fc(model._total_classes)

In [20]:
def _compute_accuracy(model, loader):
    model.eval()
    correct, total = 0, 0
    for i, (_, inputs, targets) in enumerate(loader):
        inputs = inputs.to(args["device"][0])
        with torch.no_grad():
            outputs = model(inputs)["logits"]
        predicts = torch.max(outputs, dim=1)[1]
        correct += (predicts.cpu() == targets).sum()
        total += len(targets)

    return np.around(tensor2numpy(correct) * 100 / total, decimals=2)

In [22]:
model._network.load_state_dict(checkpoint['model_state_dict'])

RuntimeError: Error(s) in loading state_dict for MultiBranchCosineIncrementalNet:
	size mismatch for fc.weight: copying a param with shape torch.Size([20, 1536]) from checkpoint, the shape in current model is torch.Size([10, 1536]).

In [17]:
#{'top1': [98.2, 95.75, 94.17, 92.32, 90.5, 88.87, 88.59, 86.16, 85.64, 85.16], 
#'top5': [100.0, 99.35, 99.1, 98.95, 98.94, 98.5, 98.51, 98.21, 97.94, 97.62]}

In [24]:
_compute_accuracy(model._network, model.test_loader)

96.6

In [19]:
model._cur_task = 1

In [26]:
#model.test_loader = test_loader
model.eval_task()

({'grouped': {'total': 96.6, '00-09': 96.6, 'old': 0, 'new': 96.6},
  'top1': 96.6,
  'top5': 99.7},
 None)

In [37]:
from art.attacks.evasion import AutoAttack 
from art.attacks.evasion import FastGradientMethod
from foolbox.attacks import LinfPGD
import eagerpy as ep

In [39]:
epsilons = [0.01]#[0.001, 0.003, 0.005, 0.008, 0.01, 0.1]
clean_acc = 0.0
robust_acc = [0.0] * len(epsilons)
attack_epochs = 5
steps = [1, 5, 10, 30, 40, 50]
attack = LinfPGD(steps=steps[0])

for i, data in enumerate(model.test_loader, 0):

    # Samples (attack_batch_size * attack_epochs) images for adversarial attack.
    if i >= attack_epochs:
        break

    images, labels = data[0].to(args["device"][0]), data[1].to(args["device"][0])

   # clean_acc += (get_acc(model, images, labels)) / args.attack_epochs

    for j in range(len(epsilons)):
        #attack = FastGradientMethod(estimator=model._network, eps=epsilons[j])
        attack = AutoAttack(model._network, norm='Linf', eps=epsilons[j], attacks=["PDG"])
        #_images, _labels = ep.astensors(images, labels)
        #raw_advs, clipped_advs, success = attack(model._network, _images, _labels, epsilons=epsilons)
    
        x_test_adv = attack.generate(x=images)
        with torch.no_grad():
            outputs = model(x_test_adv)["logits"]
        predicts = torch.max(outputs, dim=1)[1]
        
        
        accuracy = np.sum(np.argmax(predicts.cpu(), axis=1) == np.argmax(labels, axis=1)) / len(labels)
        
        robust_acc[j] += accuracy / args.attack_epochs
    


EstimatorError: AutoAttack requires an estimator derived from <class 'art.estimators.estimator.BaseEstimator'> and <class 'art.estimators.classification.classifier.ClassifierMixin'>, the provided classifier is an instance of <class 'utils.inc_net.MultiBranchCosineIncrementalNet'> and is derived from (<class 'utils.inc_net.BaseNet'>,).

In [40]:
epsilons = [0.01]#[0.001, 0.003, 0.005, 0.008, 0.01, 0.1]
clean_acc = 0.0
robust_acc = [0.0] * len(epsilons)
attack_epochs = 5
steps = [1, 5, 10, 30, 40, 50]
attack = LinfPGD(steps=steps[0])

for i, data in enumerate(model.test_loader, 0):

    # Samples (attack_batch_size * attack_epochs) images for adversarial attack.
    if i >= attack_epochs:
        break

    images, labels = data[0].to(args["device"][0]), data[1].to(args["device"][0])

   # clean_acc += (get_acc(model, images, labels)) / args.attack_epochs

    
    _images, _labels = ep.astensors(images, labels)
    raw_advs, clipped_advs, success = attack(model._network, _images, _labels, epsilons=epsilons)

    #x_test_adv = attack.generate(x=images)
    with torch.no_grad():
        outputs = model(raw_advs)["logits"]
    predicts = torch.max(outputs, dim=1)[1]
    
    
    accuracy = np.sum(np.argmax(predicts.cpu(), axis=1) == np.argmax(labels, axis=1)) / len(labels)
    
    robust_acc[j] += accuracy / args.attack_epochs
    


AttributeError: 'MultiBranchCosineIncrementalNet' object has no attribute 'bounds'

In [None]:
robust_acc

In [9]:
args["device"][0]

'cuda:7'

In [10]:
from torchvision import transforms
from art.estimators.classification import PyTorchClassifier
from art.estimators.estimator import BaseEstimator
from art.estimators.classification.classifier import ClassifierMixin

import torch.nn as nn
from torchvision import transforms, datasets
import torchvision
import numpy as np
import os
import random
import argparse
from foolbox.utils import accuracy 

from foolbox import PyTorchModel, accuracy, samples
from foolbox.attacks import LinfPGD, FGSM, L2CarliniWagnerAttack
from autoattack import AutoAttack
import eagerpy as ep
from timm.models import load_checkpoint, create_model


In [11]:
#model = Learner(args)
#model._network.eval().to(args["device"][0])
data_manager = DataManager(
    args["dataset"],
    args["shuffle"],
    args["seed"],
    args["init_cls"],
    args["increment"],
    args,
)
args["nb_classes"] = data_manager.nb_classes # update args
args["nb_tasks"] = data_manager.nb_tasks

Files already downloaded and verified
Files already downloaded and verified


In [12]:
num_workers = 1
batch_size = 64

In [13]:
model._cur_task += 1
model._total_classes = model._known_classes + data_manager.get_task_size(model._cur_task)
model._network.update_fc(model._total_classes)

NameError: name 'model' is not defined

In [15]:
model._cur_task

0

In [16]:
model._total_classes

10

In [None]:
model.after_task()

In [17]:

train_dataset = data_manager.get_dataset(np.arange(model._known_classes, model._total_classes),source="train", mode="train", )
model.train_dataset = train_dataset
model.data_manager = data_manager
model.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_dataset = data_manager.get_dataset(np.arange(0, model._total_classes), source="test", mode="test" )
model.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

train_dataset_for_protonet = data_manager.get_dataset(np.arange(model._known_classes, model._total_classes),source="train", mode="test", )
model.train_loader_for_protonet = DataLoader(train_dataset_for_protonet, batch_size=batch_size, shuffle=True, num_workers=num_workers)

#model._network.to(model._device)
model.replace_fc(model.train_loader_for_protonet, model._network, None)

SimpleVitNet(
  (backbone): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='non

In [18]:
y_pred, y_true = model._eval_cnn(model.test_loader)
cnn_accy = model._evaluate(y_pred, y_true)

In [19]:
cnn_accy

{'grouped': {'total': 93.8, '00-09': 93.8, 'old': 0, 'new': 93.8},
 'top1': 93.8,
 'top5': 99.8}

In [77]:

class PretrainedClassifier(torch.nn.Module, BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self._model = model._network
        self._model.eval()  # Ensure the model is in evaluation mode
        self.num_classes = model._total_classes
        self.input_size = (3, 224, 224)
        self._model.to(args["device"][0])
        self._model.eval()
    
    def model(self, model):
        self._model = model._network
        
    def input_shape(self):
        return self.input_size

    def fit(self, x, y):
        raise NotImplementedError("fit method not supported for pretrained models")

    def predict(self, x):
        x_tensor = torch.tensor(x)
        inputs = x_tensor.to(model._device)
        with torch.no_grad():
            outputs = self._model(inputs)["logits"]
        _, predicted = torch.max(outputs, 1)
        return predicted.cpu().numpy()

    def predict_proba(self, x):
        x_tensor = torch.tensor(x)
        inputs = x_tensor.to(model._device)
        with torch.no_grad():
            outputs = self._model(inputs)["logits"]
        probabilities = torch.softmax(outputs, dim=1)
        return probabilities.detach().numpy()

classifier = PretrainedClassifier(model)


AttributeError: cannot assign module before Module.__init__() call

In [63]:
# Wrap the classifier in a PyTorchClassifier
# This step is necessary to use ART's attack functionalities
pytorch_classifier = PyTorchClassifier(
    model=classifier,
    input_shape=(3, 224, 224),  # Assuming input shape for your model
    nb_classes=model._total_classes,
    clip_values=(0, 1),  # Adjust according to your model's input range
    loss = nn.CrossEntropyLoss()
)

TypeError: The input model must inherit from `nn.Module`.

In [29]:

from utils.inc_net import IncrementalNet,SimpleCosineIncrementalNet,SimpleVitNet
import foolbox

In [15]:
class WrapperModel(SimpleVitNet):
    def __init__(self,args):
        super().__init__(args,True)
        
        #self._cur_task += 1
        #self._total_classes = model._known_classes + data_manager.get_task_size(model._cur_task)
        self.update_fc(10)
        self.args = args
        self._device = args["device"][0]
    def __call__(self, inputs):
        return super().__call__(inputs)["logits"]
    def replace_fc(self, trainloader, train_dataset):
        model = super().eval()
        model.to(self._device)
        embedding_list = []
        label_list = []
        with torch.no_grad():
            for i, batch in enumerate(trainloader):
                (_,data, label) = batch
                data = data.to(self._device)
                label = label.to(self._device)
                embedding = model.backbone(data)
                embedding_list.append(embedding.cpu())
                label_list.append(label.cpu())
        embedding_list = torch.cat(embedding_list, dim=0)
        label_list = torch.cat(label_list, dim=0)

        class_list = np.unique(train_dataset.labels)
        proto_list = []
        for class_index in class_list:
            # print('Replacing...',class_index)
            data_index = (label_list == class_index).nonzero().squeeze(-1)
            embedding = embedding_list[data_index]
            proto = embedding.mean(0)
            self.fc.weight.data[class_index] = proto


In [17]:
_total_classes = 10
train_dataset = data_manager.get_dataset(np.arange(0, _total_classes),source="train", mode="train", )

model = WrapperModel(args)
_total_classes = 0 + data_manager.get_task_size(0)

train_dataset_for_protonet = data_manager.get_dataset(np.arange(0, _total_classes),source="train", mode="test", )
train_loader_for_protonet = DataLoader(train_dataset_for_protonet, batch_size=batch_size, shuffle=True, num_workers=num_workers)

#model._network.to(model._device)
model.replace_fc(train_loader_for_protonet, train_dataset)

model.update_fc(_total_classes)

This is for the BaseNet initialization.
After BaseNet initialization.


In [None]:
model.feature_dim

In [54]:

torch.device(args["device"][0])
model.to(args["device"][0])
model.eval()
preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
fmodel = foolbox.models.PyTorchModel(model, bounds=(0,1), device=args["device"][0], preprocessing=preprocessing)
#fmodel.to(args["device"][0])

In [55]:
model._device

'cuda:7'

In [19]:
def _compute_accuracy(model, loader):
    model.eval()
    correct, total = 0, 0
    for i, (_, inputs, targets) in enumerate(loader):
        inputs = inputs.to(model._device)
        with torch.no_grad():
            outputs = model(inputs)
        predicts = torch.max(outputs, dim=1)[1]
        correct += (predicts.cpu() == targets).sum()
        total += len(targets)

    return np.around(tensor2numpy(correct) * 100 / total, decimals=2)

In [34]:
test_dataset = data_manager.get_dataset(np.arange(0, _total_classes), source="test", mode="test" )
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
model.to(args["device"][0])
print(_compute_accuracy(model, test_loader))

93.8


In [75]:
import gc
torch.device(args["device"][0])
torch.cuda.empty_cache()
gc.collect()

9204

In [86]:

del _images, _labels, model

In [85]:
del test_loader, train_loader, acc, fmodel

In [56]:
fmodel.__dict__.keys()

dict_keys(['_model', '_bounds', '_dummy', '_preprocess_args', 'data_format', 'device'])

In [46]:
torch.device(args["device"][0])

device(type='cuda', index=7)

In [58]:
acc = []
for i, batch in enumerate(test_loader):
    (_,data, label) = batch
    
    images = data.to(args["device"][0])
    labels = label.to(args["device"][0])
    acc.append(accuracy(fmodel, images, labels))

In [59]:
len(acc)

16

In [60]:
sum(acc)/len(acc)

0.8953124992549419

In [81]:
fmodel = fmodel.transform_bounds((0, 1))

In [57]:
fmodel.device

device(type='cuda', index=7)

In [62]:
train_dataset = data_manager.get_dataset(np.arange(0, 10),source="train", mode="train")
train_dataset = train_dataset
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
attack = LinfPGD(steps=1)
epsilons = [0.0,
        0.001,
        #0.003,
        #0.005,
        #0.008,
        #0.01,
        #1.0
           ]
arr = []
for i, batch in enumerate(test_loader):

    (_,data, label) = batch
    images = data.to(args["device"][0])
    labels = label.to(args["device"][0])
    _images, _labels = ep.astensors(images, labels)
    raw_advs, clipped_advs, success = attack(fmodel, _images, _labels, epsilons=epsilons)
    robust_accuracy = 1 - success.float32().mean(axis=-1)
    arr.append(robust_accuracy)
    print("robust accuracy for perturbations with")
    for eps, acc in zip(epsilons, robust_accuracy):
        print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")

    del images, labels, _images, _labels
    #plt.plot(epsilons, robust_accuracy.numpy())


robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 82.8 %
  Linf norm ≤ 0.001 : 79.7 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 78.1 %
  Linf norm ≤ 0.001 : 78.1 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 85.9 %
  Linf norm ≤ 0.001 : 78.1 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 93.8 %
  Linf norm ≤ 0.001 : 93.8 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 95.3 %
  Linf norm ≤ 0.001 : 95.3 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 96.9 %
  Linf norm ≤ 0.001 : 96.9 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 95.3 %
  Linf norm ≤ 0.001 : 93.8 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 98.4 %
  Linf norm ≤ 0.001 : 95.3 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 92.2 %
  Linf norm ≤ 0.001 : 92.2 %
robust accuracy for perturbations with
  Linf norm ≤ 0.0   : 85.9 %
  Linf norm ≤ 0.001 : 84.4 %
robust accuracy for perturbati

In [63]:
arr

[PyTorchTensor(tensor([0.8281, 0.7969], device='cuda:7')),
 PyTorchTensor(tensor([0.7812, 0.7812], device='cuda:7')),
 PyTorchTensor(tensor([0.8594, 0.7812], device='cuda:7')),
 PyTorchTensor(tensor([0.9375, 0.9375], device='cuda:7')),
 PyTorchTensor(tensor([0.9531, 0.9531], device='cuda:7')),
 PyTorchTensor(tensor([0.9688, 0.9688], device='cuda:7')),
 PyTorchTensor(tensor([0.9531, 0.9375], device='cuda:7')),
 PyTorchTensor(tensor([0.9844, 0.9531], device='cuda:7')),
 PyTorchTensor(tensor([0.9219, 0.9219], device='cuda:7')),
 PyTorchTensor(tensor([0.8594, 0.8438], device='cuda:7')),
 PyTorchTensor(tensor([0.9062, 0.9062], device='cuda:7')),
 PyTorchTensor(tensor([0.9375, 0.9219], device='cuda:7')),
 PyTorchTensor(tensor([0.9531, 0.9531], device='cuda:7')),
 PyTorchTensor(tensor([0.8750, 0.8750], device='cuda:7')),
 PyTorchTensor(tensor([0.7812, 0.7812], device='cuda:7')),
 PyTorchTensor(tensor([0.8250, 0.8250], device='cuda:7'))]

In [88]:
import matplotlib.pyplot as plt



In [12]:
epsilons = [0.0,
        0.001,
        0.003,
        0.005,
        0.008,
        0.01,
        1.0
           ]
steps = [1, 5, 10, 30, 40, 50]

In [90]:
robacc_per_step = []
attack_epochs = 10
for step in steps:
    print(f"Step {step}")
    attack = LinfPGD(steps=step)
    
    clean_acc = 0.0
    robust_acc = []
    for i, batch in enumerate(test_loader):
        (_,data, label) = batch
        images = data.to(args["device"][0])
        labels = label.to(args["device"][0])
        
        # Samples (attack_batch_size * attack_epochs) images for adversarial attack.
        if i >= attack_epochs:
            break
    
        #images, labels = data[0].to(device), data[1].to(device)
        #if step == steps[0]:
        #    clean_acc += (get_acc(fmodel, images, labels)) / args.attack_epochs  # accumulate for attack epochs.
    
        
        _images, _labels = ep.astensors(images, labels)
        raw_advs, clipped_advs, success = attack(fmodel, _images, _labels, epsilons=epsilons)
    
        robust_accuracy = 1 - success.float32().mean(axis=-1)
        #print(robust_accuracy)
        robust_acc.append(robust_accuracy)# / attack_epochs
    
        #for eps, acc in zip(epsilons, robust_acc):
        #    print(f"  Step {step}, Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")
        #print('  -------------------')
    racc_step = 0
    for i in range(len(robust_acc)):
        racc_step += robust_acc[i].numpy()
    racc_step = racc_step/len(robust_acc)
    robacc_per_step.append(racc_step)

In [91]:
np.save('robustaccperstep.npy', robacc_per_step)

In [8]:
arr_loaded = np.load('robustaccperstep.npy')
print(arr_loaded)

[[0.9046875 0.8875    0.875     0.846875  0.8234375 0.8       0.       ]
 [0.9046875 0.8390625 0.725     0.5828125 0.3984375 0.3234375 0.       ]
 [0.9046875 0.7953125 0.528125  0.325     0.1578125 0.0953125 0.       ]
 [0.9046875 0.58125   0.171875  0.040625  0.00625   0.003125  0.       ]
 [0.9046875 0.4984375 0.1109375 0.0203125 0.0015625 0.        0.       ]
 [0.9046875 0.4515625 0.0765625 0.0109375 0.        0.        0.       ]]


In [7]:
import pandas as pd
import numpy as np

In [13]:
df = pd.DataFrame(arr_loaded, index = steps, columns = epsilons)

In [14]:
df

Unnamed: 0,0.000,0.001,0.003,0.005,0.008,0.010,1.000
1,0.904688,0.8875,0.875,0.846875,0.823438,0.8,0.0
5,0.904688,0.839063,0.725,0.582812,0.398438,0.323438,0.0
10,0.904688,0.795313,0.528125,0.325,0.157813,0.095312,0.0
30,0.904688,0.58125,0.171875,0.040625,0.00625,0.003125,0.0
40,0.904688,0.498437,0.110937,0.020312,0.001563,0.0,0.0
50,0.904688,0.451562,0.076563,0.010937,0.0,0.0,0.0
