# White-box Attack on TinyImagenet

In [1]:
import sys
#
import torch
import torch.nn as nn

#sys.path.insert(0, '..')
# import torchattacks

In [2]:
%cd adversarial-attacks-pytorch
import torchattacks
import robustbench
from robustbench.data import load_cifar10
from robustbench.utils import load_model, clean_accuracy
%cd ..

/home/aoezkan/dl_proj/adversarial-attacks-pytorch


  from .autonotebook import tqdm as notebook_tqdm


/home/aoezkan/dl_proj


In [3]:
# imports
import torch
import torch.nn as nn
import torchvision.models as models
from torch.nn import CrossEntropyLoss
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from utils.metrics import topk_acc, real_acc, AverageMeter
from utils.parsers import get_training_parser
from utils.optimizer import get_optimizer, get_scheduler, OPTIMIZERS_DICT, SCHEDULERS

from models.networks import get_model, get_model_bypass
from data_utils.data_stats import *

import matplotlib.pyplot as plt
import argparse
import time

In [4]:
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

torch.manual_seed(0)

<torch._C.Generator at 0x1527e811ee50>

In [5]:
from torchattacks import PGD
from utils_attack import *

In [6]:
device = 'cuda'
print(torch.cuda.is_available())

True


In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # Assuming a single-GPU setup
    device_properties = torch.cuda.get_device_properties(device)
    
    total_memory = device_properties.total_memory
    allocated_memory = torch.cuda.memory_allocated(device)
    cached_memory = torch.cuda.memory_reserved(device)

    print(f"Total GPU Memory: {total_memory / 1e9} GB")
    print(f"Allocated Memory: {allocated_memory / 1e9} GB")
    print(f"Cached Memory: {cached_memory / 1e9} GB")
else:
    print("CUDA is not available.")

Total GPU Memory: 11.71488768 GB
Allocated Memory: 0.0 GB
Cached Memory: 0.0 GB


# LOAD MLP

In [8]:
dataset = 'tinyimagenet'                 # One of cifar10, cifar100, stl10, imagenet or imagenet21
architecture = 'B_12-Wi_1024'
data_resolution = 64                # Resolution of data as it is stored
crop_resolution = 64                # Resolution of fine-tuned model (64 for all models we provide)
num_classes = CLASS_DICT[dataset]
eval_batch_size = 1024
checkpoint = 'B_12-Wi_1024_res_64_in21k_tinyimagenet'

In [9]:
torch.backends.cuda.matmul.allow_tf32 = True

In [10]:
num_classes

200

In [71]:
import json

from torch import nn
import torch
import numpy as np

from utils.download import download, default_checkpoints


NORMS = {
    'layer': nn.LayerNorm,
    'batch': nn.BatchNorm1d,
    'none': nn.Identity
}

ACT = {
    'gelu': nn.GELU(),
    'relu': nn.ReLU()
}


class StandardMLP(nn.Module):
    def __init__(self, dim_in, dim_out, widths):
        super(StandardMLP, self).__init__()
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.widths = widths
        self.linear_in = nn.Linear(self.dim_in, self.widths[0])
        self.linear_out = nn.Linear(self.widths[-1], self.dim_out)
        self.layers = []
        self.layer_norms = []
        for i in range(len(self.widths) - 1):
            self.layers.append(nn.Linear(self.widths[i], self.widths[i + 1]))
            self.layer_norms.append(nn.LayerNorm(widths[i + 1]))

        self.layers = nn.ModuleList(self.layers)
        self.layernorms = nn.ModuleList(self.layer_norms)

    def forward(self, x):
        z = self.linear_in(x)
        for layer, norm in zip(self.layers, self.layer_norms):
            z = norm(z)
            z = nn.GELU()(z)
            z = layer(z)

        out = self.linear_out(z)

        return out


class BottleneckMLP(nn.Module):
    def __init__(self, dim_in, dim_out, block_dims, norm='layer', checkpoint=None, name=None, bypass=False):
        super(BottleneckMLP, self).__init__()
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.block_dims = block_dims
        self.norm = NORMS[norm]
        self.checkpoint = checkpoint

        self.name = name
        self.linear_in = nn.Linear(self.dim_in, self.block_dims[0][1])
        self.linear_out = nn.Linear(self.block_dims[-1][1], self.dim_out)
        blocks = []
        layernorms = []

        for block_dim in self.block_dims:
            wide, thin = block_dim
            blocks.append(BottleneckBlock(thin=thin, wide=wide))
            layernorms.append(self.norm(thin))

        self.blocks = nn.ModuleList(blocks)
        self.layernorms = nn.ModuleList(layernorms)

        if self.checkpoint is not None:
            if not bypass:
                self.load(self.checkpoint)
            else:
                self.load_bypass(self.checkpoint)

    def forward(self, x):
        x = self.linear_in(x)

        for block, norm in zip(self.blocks, self.layernorms):
            x = x + block(norm(x))

        out = self.linear_out(x)

        return out

    def load(self, name, checkpoint_path='./checkpoints/'):
        #if name == True:
            # This simply assumes Imagenet21 pre-trained weights at the latest epoch available, no fine-tuning
        #    name = default_checkpoints[self.name]
        #elif name in ['cifar10', 'cifar100', 'imagenet']:
            # This loads the optimal fine-tuned weights for that dataset
        #    name = default_checkpoints[self.name + '_' + name]
        #else:
            # This assumes a full path, e.g. also specifying which epoch etc
        #    name = self.name + '_' + name
        name = self.name + '_' + name
        name = default_checkpoints[name]
        weight_path, config_path = download(name, checkpoint_path)

        with open(config_path, 'r') as f:
            self.config = json.load(f)

        params = {
            k: v
            for k, v in torch.load(weight_path, map_location ='cpu').items()
        }

        # Load pre-trained parameters
        print('Load_state output', self.load_state_dict(params, strict=False))

    def load_bypass(self, name, checkpoint_path='./checkpoints/'):
        #if name == True:
            # This simply assumes Imagenet21 pre-trained weights at the latest epoch available, no fine-tuning
        #    name = default_checkpoints[self.name]
        #elif name in ['cifar10', 'cifar100', 'imagenet']:
            # This loads the optimal fine-tuned weights for that dataset
        #    name = default_checkpoints[self.name + '_' + name]
        #else:
            # This assumes a full path, e.g. also specifying which epoch etc
        #    name = self.name + '_' + name
        weight_path = checkpoint_path + name
        
        print("bypass")

        params = {
            k: v
            for k, v in torch.load(weight_path, map_location ='cpu').items()
        }

        # Load pre-trained parameters
        print('Load_state output', self.load_state_dict(params, strict=True))


class BottleneckBlock(nn.Module):
    def __init__(self, thin, wide, act=nn.GELU()):
        super(BottleneckBlock, self).__init__()

        self.block = nn.Sequential(
            nn.Linear(thin, wide), act, nn.Linear(wide, thin)
        )

    def forward(self, x):
        out = self.block(x)

        return out


def B_12_Wi_1024(dim_in, dim_out, checkpoint=None, bypass=False):
    block_dims = [[4 * 1024, 1024] for _ in range(12)]
    return BottleneckMLP(dim_in=dim_in, dim_out=dim_out, norm='layer', block_dims=block_dims, checkpoint=checkpoint,
                         name='B_' + str(len(block_dims)) + '-Wi_' + str(block_dims[0][1]) + '_res_' + str(int(np.sqrt(dim_in/3))), bypass=bypass)


def B_12_Wi_512(dim_in, dim_out, checkpoint=None, bypass=False):
    block_dims = [[4 * 512, 512] for _ in range(12)]
    return BottleneckMLP(dim_in=dim_in, dim_out=dim_out, norm='layer', block_dims=block_dims, checkpoint=checkpoint,
                         name='B_' + str(len(block_dims)) + '-Wi_' + str(block_dims[0][1]) + '_res_' + str(int(np.sqrt(dim_in/3))), bypass=bypass)


def B_6_Wi_1024(dim_in, dim_out, checkpoint=None, bypass=False):
    block_dims = [[4 * 1024, 1024] for _ in range(6)]
    return BottleneckMLP(dim_in=dim_in, dim_out=dim_out, norm='layer', block_dims=block_dims, checkpoint=checkpoint,
                         name='B_' + str(len(block_dims)) + '-Wi_' + str(block_dims[0][1]) + '_res_' + str(int(np.sqrt(dim_in/3))), bypass=bypass)


def B_6_Wi_512(dim_in, dim_out, checkpoint=None, bypass=False):
    block_dims = [[4 * 512, 512] for _ in range(6)]
    return BottleneckMLP(dim_in=dim_in, dim_out=dim_out, norm='layer', block_dims=block_dims, checkpoint=checkpoint,
                         name='B_' + str(len(block_dims)) + '-Wi_' + str(block_dims[0][1]) + '_res_' + str(int(np.sqrt(dim_in/3))), bypass=bypass)


model_list = {
    'B_12-Wi_1024': B_12_Wi_1024,
    'B_12-Wi_512': B_12_Wi_512,
    'B_6-Wi_1024': B_6_Wi_1024,
    'B_6-Wi_512': B_6_Wi_512
}


def get_model(architecture, checkpoint, resolution, num_classes):
    return model_list[architecture](dim_in=resolution**2 * 3, dim_out=num_classes, checkpoint=checkpoint)

def get_model_bypass(architecture, checkpoint, resolution, num_classes):
    return model_list[architecture](dim_in=resolution**2 * 3, dim_out=num_classes, checkpoint=checkpoint, bypass=True)

In [12]:
class MLP_Wrapper(nn.Module):
    def __init__(self, architecture, resolution, num_classes, checkpoint):
        super(MLP_Wrapper, self).__init__()
        self.model = get_model_bypass(architecture=architecture, resolution=crop_resolution, num_classes=num_classes, checkpoint=checkpoint)
        self.resize = transforms.Resize((resolution, resolution))

    def forward(self, x):
        x = self.resize(x)
        x = x.flatten(1)
        x = self.model(x)
        return x

In [74]:
model_mlp = MLP_Wrapper(architecture, crop_resolution, num_classes, checkpoint)

model_mlp = model_mlp.to(device)
model_mlp.eval()

bypass


RuntimeError: Error(s) in loading state_dict for BottleneckMLP:
	Missing key(s) in state_dict: "linear_in.weight", "linear_in.bias", "linear_out.weight", "linear_out.bias", "blocks.0.block.0.weight", "blocks.0.block.0.bias", "blocks.0.block.2.weight", "blocks.0.block.2.bias", "blocks.1.block.0.weight", "blocks.1.block.0.bias", "blocks.1.block.2.weight", "blocks.1.block.2.bias", "blocks.2.block.0.weight", "blocks.2.block.0.bias", "blocks.2.block.2.weight", "blocks.2.block.2.bias", "blocks.3.block.0.weight", "blocks.3.block.0.bias", "blocks.3.block.2.weight", "blocks.3.block.2.bias", "blocks.4.block.0.weight", "blocks.4.block.0.bias", "blocks.4.block.2.weight", "blocks.4.block.2.bias", "blocks.5.block.0.weight", "blocks.5.block.0.bias", "blocks.5.block.2.weight", "blocks.5.block.2.bias", "blocks.6.block.0.weight", "blocks.6.block.0.bias", "blocks.6.block.2.weight", "blocks.6.block.2.bias", "blocks.7.block.0.weight", "blocks.7.block.0.bias", "blocks.7.block.2.weight", "blocks.7.block.2.bias", "blocks.8.block.0.weight", "blocks.8.block.0.bias", "blocks.8.block.2.weight", "blocks.8.block.2.bias", "blocks.9.block.0.weight", "blocks.9.block.0.bias", "blocks.9.block.2.weight", "blocks.9.block.2.bias", "blocks.10.block.0.weight", "blocks.10.block.0.bias", "blocks.10.block.2.weight", "blocks.10.block.2.bias", "blocks.11.block.0.weight", "blocks.11.block.0.bias", "blocks.11.block.2.weight", "blocks.11.block.2.bias", "layernorms.0.weight", "layernorms.0.bias", "layernorms.1.weight", "layernorms.1.bias", "layernorms.2.weight", "layernorms.2.bias", "layernorms.3.weight", "layernorms.3.bias", "layernorms.4.weight", "layernorms.4.bias", "layernorms.5.weight", "layernorms.5.bias", "layernorms.6.weight", "layernorms.6.bias", "layernorms.7.weight", "layernorms.7.bias", "layernorms.8.weight", "layernorms.8.bias", "layernorms.9.weight", "layernorms.9.bias", "layernorms.10.weight", "layernorms.10.bias", "layernorms.11.weight", "layernorms.11.bias". 
	Unexpected key(s) in state_dict: "module.linear_in.weight", "module.linear_in.bias", "module.linear_out.weight", "module.linear_out.bias", "module.blocks.0.block.0.weight", "module.blocks.0.block.0.bias", "module.blocks.0.block.2.weight", "module.blocks.0.block.2.bias", "module.blocks.1.block.0.weight", "module.blocks.1.block.0.bias", "module.blocks.1.block.2.weight", "module.blocks.1.block.2.bias", "module.blocks.2.block.0.weight", "module.blocks.2.block.0.bias", "module.blocks.2.block.2.weight", "module.blocks.2.block.2.bias", "module.blocks.3.block.0.weight", "module.blocks.3.block.0.bias", "module.blocks.3.block.2.weight", "module.blocks.3.block.2.bias", "module.blocks.4.block.0.weight", "module.blocks.4.block.0.bias", "module.blocks.4.block.2.weight", "module.blocks.4.block.2.bias", "module.blocks.5.block.0.weight", "module.blocks.5.block.0.bias", "module.blocks.5.block.2.weight", "module.blocks.5.block.2.bias", "module.blocks.6.block.0.weight", "module.blocks.6.block.0.bias", "module.blocks.6.block.2.weight", "module.blocks.6.block.2.bias", "module.blocks.7.block.0.weight", "module.blocks.7.block.0.bias", "module.blocks.7.block.2.weight", "module.blocks.7.block.2.bias", "module.blocks.8.block.0.weight", "module.blocks.8.block.0.bias", "module.blocks.8.block.2.weight", "module.blocks.8.block.2.bias", "module.blocks.9.block.0.weight", "module.blocks.9.block.0.bias", "module.blocks.9.block.2.weight", "module.blocks.9.block.2.bias", "module.blocks.10.block.0.weight", "module.blocks.10.block.0.bias", "module.blocks.10.block.2.weight", "module.blocks.10.block.2.bias", "module.blocks.11.block.0.weight", "module.blocks.11.block.0.bias", "module.blocks.11.block.2.weight", "module.blocks.11.block.2.bias", "module.layernorms.0.weight", "module.layernorms.0.bias", "module.layernorms.1.weight", "module.layernorms.1.bias", "module.layernorms.2.weight", "module.layernorms.2.bias", "module.layernorms.3.weight", "module.layernorms.3.bias", "module.layernorms.4.weight", "module.layernorms.4.bias", "module.layernorms.5.weight", "module.layernorms.5.bias", "module.layernorms.6.weight", "module.layernorms.6.bias", "module.layernorms.7.weight", "module.layernorms.7.bias", "module.layernorms.8.weight", "module.layernorms.8.bias", "module.layernorms.9.weight", "module.layernorms.9.bias", "module.layernorms.10.weight", "module.layernorms.10.bias", "module.layernorms.11.weight", "module.layernorms.11.bias". 

In [12]:
num_classes

200

In [13]:
checkpoint

'B_12-Wi_1024_res_64_in21k_tinyimagenet.t7'

In [14]:
checkpoint

'B_12-Wi_1024_res_64_in21k_tinyimagenet.t7'

In [18]:
device = 'cpu'

In [22]:
architecture

'B_12-Wi_1024'

In [11]:
class Model_Wrapper(nn.Module):
    def __init__(self, model, input_size):
        super(Model_Wrapper, self).__init__()
        self.model = model
        self.resize = transforms.Resize(input_size)

    def forward(self, x):
        x = self.resize(x)
        x = self.model(x)
        return x

In [35]:
class MLP_Wrapper(nn.Module):
    def __init__(self, model, input_size):
        super(MLP_Wrapper, self).__init__()
        self.model = model
        self.resize = transforms.Resize(input_size)

    def forward(self, x):
        x = x.flatten(1)
        x = self.model(x)
        return x

In [12]:
import os

In [37]:
dataset = 'cifar10'                 # One of cifar10, cifar100, stl10, imagenet or imagenet21
architecture = 'B_12-Wi_1024'
data_resolution = 32                # Resolution of data as it is stored
crop_resolution = 64                # Resolution of fine-tuned model (64 for all models we provide)
num_classes = CLASS_DICT[dataset]
eval_batch_size = 1024
checkpoint = 'in21k_imagenet'

# bypass ing 
num_classes = 1000
input_size = 64
model_mlp = get_model(architecture=architecture, resolution=input_size, num_classes=num_classes, checkpoint=checkpoint)
model_mlp.linear_out = nn.Linear(1024, 200)

model_full_path = os.path.join("checkpoints", "mlp_b12_wi1024_imagenet_bs128_tinyimagenet.t7")
checkpoint = torch.load(model_full_path, map_location=torch.device(device))

model_mlp = torch.nn.DataParallel(model_mlp)
model_mlp.load_state_dict(checkpoint['model'])
model_mlp.eval()

Weights already downloaded
Load_state output <All keys matched successfully>


DataParallel(
  (module): BottleneckMLP(
    (linear_in): Linear(in_features=12288, out_features=1024, bias=True)
    (linear_out): Linear(in_features=1024, out_features=200, bias=True)
    (blocks): ModuleList(
      (0-11): 12 x BottleneckBlock(
        (block): Sequential(
          (0): Linear(in_features=1024, out_features=4096, bias=True)
          (1): GELU(approximate='none')
          (2): Linear(in_features=4096, out_features=1024, bias=True)
        )
      )
    )
    (layernorms): ModuleList(
      (0-11): 12 x LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    )
  )
)

In [38]:
model_mlp_wrap = MLP_Wrapper(model_mlp, input_size=input_size)

model_mlp_wrap = model_mlp_wrap.to(device)
model_mlp_wrap.eval()

MLP_Wrapper(
  (model): DataParallel(
    (module): BottleneckMLP(
      (linear_in): Linear(in_features=12288, out_features=1024, bias=True)
      (linear_out): Linear(in_features=1024, out_features=200, bias=True)
      (blocks): ModuleList(
        (0-11): 12 x BottleneckBlock(
          (block): Sequential(
            (0): Linear(in_features=1024, out_features=4096, bias=True)
            (1): GELU(approximate='none')
            (2): Linear(in_features=4096, out_features=1024, bias=True)
          )
        )
      )
      (layernorms): ModuleList(
        (0-11): 12 x LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      )
    )
  )
  (resize): Resize(size=64, interpolation=bilinear, max_size=None, antialias=warn)
)

# LOAD CNN

In [17]:
from cifar10_models.resnet import *
model_resnet = resnet18()
model_resnet.conv1 = nn.Conv2d(out_channels=64, in_channels=3, kernel_size=7, bias=False)
model_resnet.fc = nn.Linear(out_features=200, in_features=512)

model_full_path = os.path.join("checkpoints", "ResNet18_TinyImageNet.t7")
checkpoint = torch.load(model_full_path, map_location=torch.device(device))

model_resnet = torch.nn.DataParallel(model_resnet)
model_resnet.load_state_dict(checkpoint['model'])
model_resnet = model_resnet.to(device)
model_resnet.eval()

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

In [18]:
if next(model_resnet.parameters()).is_cuda:
    print("Model is on CUDA (GPU)")
else:
    print("Model is on CPU")

Model is on CUDA (GPU)


In [19]:
model_resnet_wrap = Model_Wrapper(model_resnet, input_size=input_size)

model_resnet_wrap = model_resnet_wrap.to(device)
model_resnet_wrap.eval()

Model_Wrapper(
  (model): DataParallel(
    (module): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, e

# LOAD ViT

In [20]:
%cd ..
%cd pytorch-image-models
import timm
%cd ..
%cd dl_proj

/home/aoezkan
/home/aoezkan/pytorch-image-models
/home/aoezkan
/home/aoezkan/dl_proj


In [21]:

model_vit = timm.create_model("vit_tiny_patch16_224", pretrained=True)
model_vit.head = nn.Linear(model_vit.head.in_features, 200) # tinyimagnet


model_full_path = os.path.join("checkpoints", "vit_tiny_patch16_224_tinyimagenet.t7")
checkpoint = torch.load(model_full_path, map_location=torch.device(device))

model_vit = torch.nn.DataParallel(model_vit)
model_vit.load_state_dict(checkpoint['model'])
model_vit = model_vit.to(device)
model_vit.eval()



DataParallel(
  (module): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=192, out_features=576, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=192, out_features=192, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=192, out_features=768, bias=True)
          (act): GELU(approximate='none')


In [22]:
input_size = 224
model_vit_wrap = Model_Wrapper(model_vit, input_size=input_size)

model_vit_wrap = model_vit_wrap.to(device)
model_vit_wrap.eval()

Model_Wrapper(
  (model): DataParallel(
    (module): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=192, out_features=576, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=192, out_features=192, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=19

# LOAD DATA

In [20]:
# %cd ..
# %cd datasets/src/datasets
# from datasets import load_dataset, get_dataset_split_names
# %cd ../../..
# %cd dl_proj



In [23]:
import os
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from PIL import Image
import cv2

# def collate_fn_train(batch):
#     return ((torch.stack([transform_train(Image.fromarray(cv2.cvtColor(np.array(x),cv2.COLOR_GRAY2RGB))) if np.array(x).ndim == 2 else transform_train(x) for x,y in batch])), torch.tensor([x['label'] for x in batch]))
      
    
input_size = 64
bs = 100

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)


# Set the path to the TinyImageNet dataset
data_dir = '../tiny-imagenet-200/'


# Define data transforms (you can customize these as needed)
transform = transforms.Compose([
    transforms.Resize((input_size, input_size)),  # Resize images to a consistent size
    transforms.ToTensor(),         # Convert images to PyTorch tensors
    transforms.Normalize(mean=mean, std=std)  # Normalize the image data
])


def collate_fn_test(batch):
    inputs = []
    for x,y in batch:
        if np.array(x).ndim == 2: # if grayscale
            xx = transform(Image.fromarray(cv2.cvtColor(np.array(x),cv2.COLOR_GRAY2RGB)))
        else: # if rgb
            xx = transform(x)
        inputs.append(xx)
        
    inputs = torch.stack(inputs)
    labels = torch.tensor([y for x,y in batch])
    return (inputs, labels)

# def collate_fn_test(batch):
    # return ((torch.stack([transform(Image.fromarray(cv2.cvtColor(np.array(x['image']),cv2.COLOR_GRAY2RGB))) if np.array(x['image']).ndim == 2 else transform(x['image']) for x in batch])), torch.tensor([x['label'] for x in batch]))
      
# Create an ImageFolder dataset for training
# train_dataset = ImageFolder(os.path.join(data_dir, 'train'), transform=transform)

# Create a DataLoader for training data
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)

# Create an ImageFolder dataset for validation
val_dataset = ImageFolder(os.path.join(data_dir, 'val'), transform=transform)

# Create a DataLoader for validation data
val_loader = DataLoader(val_dataset, batch_size=bs, shuffle=False, num_workers=1)

# Optionally, create a DataLoader for the test set
test_dataset = ImageFolder(os.path.join(data_dir, 'test'), transform=transform)
test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=1)


In [69]:
test_dataset_orig = ImageFolder(os.path.join(data_dir, 'test'))

In [70]:
test_dataset_orig[3]

(<PIL.Image.Image image mode=RGB size=64x64>, 0)

In [71]:
test_dataset[3]

(tensor([[[ 1.8550,  1.7694,  1.6324,  ...,  2.0092,  2.0092,  2.0434],
          [ 2.0605,  1.9064,  1.5810,  ...,  2.0092,  2.0263,  2.0434],
          [ 0.7419,  2.2147,  0.8104,  ...,  2.0263,  2.0434,  2.0434],
          ...,
          [ 1.4612,  1.4783,  1.4783,  ...,  1.4954,  1.4954,  1.4954],
          [ 1.4612,  1.4612,  1.4783,  ...,  1.4954,  1.4783,  1.4783],
          [ 1.4612,  1.4612,  1.4612,  ...,  1.4783,  1.4783,  1.4783]],
 
         [[ 2.0609,  2.0784,  2.1485,  ...,  2.1134,  2.1134,  2.1485],
          [ 1.9034,  1.8508,  1.6933,  ...,  2.1134,  2.1310,  2.1485],
          [-0.2325,  1.3782,  0.1352,  ...,  2.1310,  2.1485,  2.1485],
          ...,
          [ 1.5532,  1.5707,  1.5707,  ...,  1.5882,  1.5882,  1.5882],
          [ 1.5532,  1.5532,  1.5707,  ...,  1.5882,  1.5707,  1.5707],
          [ 1.5532,  1.5532,  1.5532,  ...,  1.5707,  1.5707,  1.5707]],
 
         [[ 1.9080,  1.8905,  1.8905,  ...,  2.1171,  2.1171,  2.1520],
          [ 1.8731,  1.7685,

In [72]:
len(testset['image'])

NameError: name 'testset' is not defined

In [105]:
for batch in test_loader:
    break

In [98]:
model_vit_wrap(x)



tensor([[-2.0163, -0.3998, -2.1935,  ..., -0.0682,  2.7073, -3.9640],
        [ 0.2181, -1.0537, -0.5756,  ..., -1.5039, -0.5743, -1.0677],
        [-1.6491, -4.6537,  2.9332,  ..., -3.2640,  4.2584, -4.4190],
        ...,
        [-1.3811,  4.2886,  0.1140,  ...,  2.8328,  4.5107,  1.2314],
        [-6.6187, -2.2151, -4.1474,  ...,  1.8303, -3.5188, -8.0871],
        [-1.3849, -3.9730,  0.1594,  ..., -1.2767,  2.7958, -5.8903]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [92]:
model_mlp

DataParallel(
  (module): BottleneckMLP(
    (linear_in): Linear(in_features=12288, out_features=1024, bias=True)
    (linear_out): Linear(in_features=1024, out_features=200, bias=True)
    (blocks): ModuleList(
      (0-11): 12 x BottleneckBlock(
        (block): Sequential(
          (0): Linear(in_features=1024, out_features=4096, bias=True)
          (1): GELU(approximate='none')
          (2): Linear(in_features=4096, out_features=1024, bias=True)
        )
      )
    )
    (layernorms): ModuleList(
      (0-11): 12 x LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    )
  )
)

In [82]:
x, y = batch

In [83]:
x.shape

torch.Size([100, 3, 64, 64])

In [84]:
y.shape

torch.Size([100])

In [86]:
x[0]

tensor([[[-0.6965, -0.7137, -0.6794,  ..., -0.1657, -0.1999, -0.1999],
         [-0.6965, -0.6965, -0.6623,  ..., -0.1486, -0.1828, -0.1999],
         [-0.6794, -0.6794, -0.6452,  ..., -0.1314, -0.1486, -0.1657],
         ...,
         [-0.5253, -0.5082, -0.4911,  ..., -0.2171, -0.2513, -0.2684],
         [-0.5253, -0.5082, -0.4911,  ..., -0.1999, -0.2171, -0.2513],
         [-0.5253, -0.5253, -0.4911,  ..., -0.1828, -0.1828, -0.1999]],

        [[-0.4951, -0.5126, -0.5301,  ..., -0.0399, -0.0749, -0.0749],
         [-0.4951, -0.4951, -0.5126,  ..., -0.0224, -0.0574, -0.0749],
         [-0.4776, -0.4776, -0.4951,  ..., -0.0049, -0.0224, -0.0399],
         ...,
         [-0.4076, -0.3901, -0.3725,  ..., -0.0924, -0.1275, -0.1450],
         [-0.4076, -0.3901, -0.3725,  ..., -0.0749, -0.0924, -0.1275],
         [-0.4076, -0.4076, -0.3725,  ..., -0.0574, -0.0574, -0.0749]],

        [[-0.3753, -0.3927, -0.3578,  ...,  0.1825,  0.1476,  0.1476],
         [-0.3753, -0.3753, -0.3404,  ...,  0

In [41]:
model_mlp_wrap(images)

tensor([[-1.5364, -2.3948,  1.2522,  ...,  1.5902,  0.7864, -0.3174],
        [-2.7206, -6.2073, -1.6139,  ..., -3.2325,  0.0199,  2.4659],
        [-1.2435, -1.0192, -0.1396,  ..., -2.3569, -0.9291,  0.3972],
        ...,
        [-2.1531, -1.4292, -1.3181,  ...,  4.1561,  0.8508,  2.7130],
        [-1.8335, -3.8028, -1.1931,  ...,  0.9529, -1.2413, -2.9834],
        [-1.3705, -3.7400, -1.1680,  ..., -1.5639, -1.6479, -0.8802]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [27]:
12288/3

4096.0

In [29]:
64**2

4096

In [30]:
images.shape

torch.Size([100, 3, 64, 64])

In [24]:
#sys.path.insert(0, '..')
import robustbench
from robustbench.data import load_cifar10
from robustbench.utils import load_model, clean_accuracy

from tqdm import tqdm

In [42]:
print('[Data loaded]')

acc_mlp_list = []
acc_cnn_list = []
acc_vit_list = []

for batch in tqdm(test_loader):
    images, labels = batch
    acc_mlp = clean_accuracy(model_mlp_wrap, images.to(device), labels.to(device))
    acc_mlp_list.append(acc_mlp)

    acc_cnn = clean_accuracy(model_resnet, images.to(device), labels.to(device))
    acc_cnn_list.append(acc_cnn)

    acc_vit = clean_accuracy(model_vit_wrap, images.to(device), labels.to(device))
    acc_vit_list.append(acc_vit)

[Data loaded]


100%|██████████| 100/100 [00:42<00:00,  2.36it/s]


In [43]:
acc_mlp = np.array(acc_mlp_list)
acc = acc_mlp.mean()
print('[MLP loaded]')
print('MLP Acc: %2.2f %%'%(acc*100))

acc_cnn = np.array(acc_cnn_list)
acc = acc_cnn.mean()
print('[CNN loaded]')
print('CNN Acc: %2.2f %%'%(acc*100))

acc_vit = np.array(acc_vit_list)
acc = acc_vit.mean()
print('[ViT loaded]')
print('ViT Acc: %2.2f %%'%(acc*100))

[MLP loaded]
MLP Acc: 0.36 %
[CNN loaded]
CNN Acc: 0.14 %
[ViT loaded]
ViT Acc: 0.41 %


In [None]:
def compare_adv_attacks(model1, model2, atk_model1, images, labels, iterations=10, device='cpu'):

    model1.eval()
    model2.eval()

    successful_adv_attacks_model1 = []
    successful_adv_attacks_model2 = []

    for it in range(iterations):
        
        adv_images = atk_model1(images, labels)
        predictions = get_pred(model1, adv_images, device)
        predictions = predictions.cuda()
        
        seccessful_adv_images = adv_images[predictions != labels]
        true_labels_of_succ_adv_images = labels[predictions != labels]
        successful_adv_attacks_model1.append(seccessful_adv_images.shape[0])


        predictions = get_pred(model2, seccessful_adv_images, device)
        predictions = predictions.cuda()
        seccessful_adv_images = seccessful_adv_images[predictions != true_labels_of_succ_adv_images]
        successful_adv_attacks_model2.append(seccessful_adv_images.shape[0])

    return successful_adv_attacks_model1, successful_adv_attacks_model2




In [None]:
N_exp = 20

atk_mlp = PGD(model_mlp, eps=8/255, alpha=2/225, steps=10, random_start=True)
# atk_mlp.set_normalization_used(mean=mean, std=std)


l1_list_mlp = []
l2_list_mlp = []
# 100 batches
for i,batch in enumerate(test_loader):
    print("batch:", i)
    ll1_list = []
    ll2_list = []
    # N_exp times
    for i in range(5):
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)
        
        l1, l2 = compare_adv_attacks(model_mlp, model_resnet, atk_mlp, images, labels, 1, device)
        ll1_list.append(l1)
        ll2_list.append(l2)
    
    l1_list_mlp.append(ll1_list)
    l2_list_mlp.append(ll2_list)

In [None]:
len(l1_list)

In [None]:
ll2_list

In [None]:
mlp_adv.shape

In [None]:
np.squeeze(mlp_adv,2).shape

In [None]:
cnn_adv.shape

In [None]:
# Trained on mlp Tested on resnet

In [None]:
mlp_adv = np.array(l1_list_mlp)
mlp_adv = np.squeeze(mlp_adv,2)
mlp_adv = mlp_adv.sum(axis=0)

mlp_adv_mean = mlp_adv.mean(axis=0)
mlp_adv_std = mlp_adv.std(axis=0)
print('mlp_adv_mean: ', mlp_adv_mean)
print('mlp_adv_std: ', mlp_adv_std)

cnn_adv = np.array(l2_list_mlp) 
cnn_adv = np.squeeze(cnn_adv,2)
cnn_adv = cnn_adv.sum(axis=0)

cnn_adv_mean = cnn_adv.mean(axis=0)
cnn_adv_std = cnn_adv.std(axis=0)
print('cnn_adv_mean: ', cnn_adv_mean)
print('cnn_adv_std: ', cnn_adv_std)



In [None]:
print("ratio: ", (cnn_adv_mean/mlp_adv_mean)*100)

In [None]:
cnn_adv

In [None]:
N_exp = 20

atk_cnn = PGD(model_resnet, eps=8/255, alpha=2/225, steps=10, random_start=True)
# atk_cnn.set_normalization_used(mean=mean, std=std)


l1_list_cnn = []
l2_list_cnn = []
# 100 batches
for i,batch in enumerate(test_loader):
    print("batch:", i)
    ll1_list = []
    ll2_list = []
    # N_exp times
    for i in range(5):
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)
        l1, l2 = compare_adv_attacks(model_resnet, model_mlp, atk_cnn, images, labels, 1, device)
        ll1_list.append(l1)
        ll2_list.append(l2)
    
    l1_list_cnn.append(ll1_list)
    l2_list_cnn.append(ll2_list)

In [None]:
# Trained on resnet Tested on mlp

In [None]:
cnn_adv = np.array(l1_list_cnn)
cnn_adv = np.squeeze(cnn_adv,2)
cnn_adv = cnn_adv.sum(axis=0)

cnn_adv_mean = cnn_adv.mean(axis=0)
cnn_adv_std = cnn_adv.std(axis=0)
print('cnn_adv_mean: ', cnn_adv_mean)
print('cnn_adv_std: ', cnn_adv_std)

mlp_adv = np.array(l2_list_cnn) 
mlp_adv = np.squeeze(mlp_adv,2)
mlp_adv = mlp_adv.sum(axis=0)

mlp_adv_mean = mlp_adv.mean(axis=0)
mlp_adv_std = mlp_adv.std(axis=0)
print('mlp_adv_mean: ', mlp_adv_mean)
print('mlp_adv_std: ', mlp_adv_std)

In [None]:
print("ratio: ", (mlp_adv_mean/cnn_adv_mean)*100)

In [None]:
# PGD: Inside normalization

In [None]:
mean = (0.4914, 0.4822, 0.4465)
std = (0.2471, 0.2435, 0.2616)

transform = transforms.Compose([
    #transforms.Resize((crop_resolution, crop_resolution)),
    transforms.ToTensor(),
    # transforms.Normalize(mean, std),
])

batch_size = 100

dataset = datasets.CIFAR10(root='./data',
                           train=False,
                           transform=transform,
                           download=True)

test_loader = DataLoader(dataset,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=0)

In [None]:
N_exp = 20

atk_mlp = PGD(model_mlp, eps=8/255, alpha=2/225, steps=10, random_start=True)
atk_mlp.set_normalization_used(mean=mean, std=std)


l1_list_mlp = []
l2_list_mlp = []
# 100 batches
for i,batch in enumerate(test_loader):
    print("batch:", i)
    ll1_list = []
    ll2_list = []
    # N_exp times
    for i in range(5):
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)
        
        l1, l2 = compare_adv_attacks(model_mlp, model_resnet, atk_mlp, images, labels, 1, device)
        ll1_list.append(l1)
        ll2_list.append(l2)
    
    l1_list_mlp.append(ll1_list)
    l2_list_mlp.append(ll2_list)



In [None]:
# Trained on cnn Tested on mlp

In [None]:
mlp_adv = np.array(l1_list_mlp)
mlp_adv = np.squeeze(mlp_adv,2)
mlp_adv = mlp_adv.sum(axis=0)

mlp_adv_mean = mlp_adv.mean(axis=0)
mlp_adv_std = mlp_adv.std(axis=0)
print('mlp_adv_mean: ', mlp_adv_mean)
print('mlp_adv_std: ', mlp_adv_std)

cnn_adv = np.array(l2_list_mlp) 
cnn_adv = np.squeeze(cnn_adv,2)
cnn_adv = cnn_adv.sum(axis=0)

cnn_adv_mean = cnn_adv.mean(axis=0)
cnn_adv_std = cnn_adv.std(axis=0)
print('cnn_adv_mean: ', cnn_adv_mean)
print('cnn_adv_std: ', cnn_adv_std)

In [None]:
print("ratio: ", (cnn_adv_mean/mlp_adv_mean)*100)

In [None]:
N_exp = 20

atk_cnn = PGD(model_resnet, eps=8/255, alpha=2/225, steps=10, random_start=True)
atk_cnn.set_normalization_used(mean=mean, std=std)


l1_list_cnn = []
l2_list_cnn = []
# 100 batches
for i,batch in enumerate(test_loader):
    print("batch:", i)
    ll1_list = []
    ll2_list = []
    # N_exp times
    for i in range(5):
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)
        l1, l2 = compare_adv_attacks(model_resnet, model_mlp, atk_cnn, images, labels, 1, device)
        ll1_list.append(l1)
        ll2_list.append(l2)
    
    l1_list_cnn.append(ll1_list)
    l2_list_cnn.append(ll2_list)

In [None]:
# Trained on cnn Tested on mlp

In [None]:
cnn_adv = np.array(l1_list_cnn)
cnn_adv = np.squeeze(cnn_adv,2)
cnn_adv = cnn_adv.sum(axis=0)

cnn_adv_mean = cnn_adv.mean(axis=0)
cnn_adv_std = cnn_adv.std(axis=0)
print('cnn_adv_mean: ', cnn_adv_mean)
print('cnn_adv_std: ', cnn_adv_std)

mlp_adv = np.array(l2_list_cnn) 
mlp_adv = np.squeeze(mlp_adv,2)
mlp_adv = mlp_adv.sum(axis=0)

mlp_adv_mean = mlp_adv.mean(axis=0)
mlp_adv_std = mlp_adv.std(axis=0)
print('mlp_adv_mean: ', mlp_adv_mean)
print('mlp_adv_std: ', mlp_adv_std)

In [None]:
print("ratio: ", (mlp_adv_mean/cnn_adv_mean)*100)

In [None]:
model1 = model_mlp
model2 = model_resnet
atk_model1 = atk
iterations = 1

In [None]:
model1.eval()
model2.eval()

successful_adv_attacks_model1 = []
successful_adv_attacks_model2 = []

for it in range(iterations):
    print("Iteration:", it)
    adv_images = atk_model1(images, labels)
    predictions = get_pred(model1, adv_images, device)

    seccessful_adv_images = adv_images[predictions.cuda() != labels]
    true_labels_of_succ_adv_images = labels[predictions != labels]
    successful_adv_attacks_model1.append(seccessful_adv_images.shape[0])

    predictions = get_pred(model2, seccessful_adv_images, device)
    seccessful_adv_images = seccessful_adv_images[predictions != true_labels_of_succ_adv_images]
    successful_adv_attacks_model2.append(seccessful_adv_images.shape[0])

In [None]:
predictions

In [None]:
labels

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # Assuming a single-GPU setup
    device_properties = torch.cuda.get_device_properties(device)
    
    total_memory = device_properties.total_memory
    allocated_memory = torch.cuda.memory_allocated(device)
    cached_memory = torch.cuda.memory_reserved(device)

    print(f"Total GPU Memory: {total_memory / 1e9} GB")
    print(f"Allocated Memory: {allocated_memory / 1e9} GB")
    print(f"Cached Memory: {cached_memory / 1e9} GB")
else:
    print("CUDA is not available.")

In [None]:
l1_list = []
l2_list = []

In [None]:

    
atk = PGD(model_mlp, eps=8/255, alpha=2/225, steps=10, random_start=True)
atk.set_normalization_used(mean=mean, std=std)
l1, l2 = compare_adv_attcks(model_mlp, model_resnet, atk, images, labels, N_exp, device)


In [None]:
atk = PGD(model_resnet, eps=8/255, alpha=2/225, steps=10, random_start=True)
atk.set_normalization_used(mean=mean, std=std)
l11, l22 = compare_adv_attcks(model_resnet, model_mlp, atk, images, labels, 2)

In [None]:
print(l1, l2)

In [None]:
print(l11, l22)

In [None]:
idx = 0
pre = get_pred(model_resnet, adv_images[idx:idx+1], device)
imshow(adv_images[idx:idx+1], title="True:%d, Pre:%d"%(labels[idx], pre))

In [None]:
idx = 0
pre = get_pred(model, adv_images1[idx:idx+1].flatten(1), device)
imshow(adv_images1[idx:idx+1], title="True:%d, Pre:%d"%(labels[idx], pre))