In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import gc
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torch.hub import load_state_dict_from_url
from torchvision import datasets, models, transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR
from tqdm import tqdm

from sklearn.metrics import accuracy_score, roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

from torch.autograd import Variable

In [None]:
def _weights_init(m):
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
      super(LambdaLayer, self).__init__()
      self.lambd = lambd

    def forward(self, x):
      return self.lambd(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                """
                For CIFAR10 ResNet paper uses option A.
                """
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 16

        self.in_channels = 1 # 3 for CIFAR OR 1 for MNIST
        self.conv1 = nn.Conv2d(self.in_channels, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)

        self.apply(self._weights_init)

    def _weights_init(self, m):
        classname = m.__class__.__name__
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight)
    
    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x, return_interm_layer=None):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        if return_interm_layer == 1:
            return out
        out = self.layer2(out)
        if return_interm_layer == 2:
            return out
        out = self.layer3(out)
        if return_interm_layer == 3:
            return out
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        
        return out

def resnet20():
    return ResNet(BasicBlock, [3, 3, 3])

def resnet32():
    return ResNet(BasicBlock, [5, 5, 5])

def resnet44():
    return ResNet(BasicBlock, [7, 7, 7])

def resnet56():
    return ResNet(BasicBlock, [9, 9, 9])

def resnet110():
    return ResNet(BasicBlock, [18, 18, 18])

def resnet1202():
    return ResNet(BasicBlock, [200, 200, 200])

In [None]:
class SubNet(nn.Module):
    def __init__(self, in_channels):
        super(SubNet, self).__init__()
        conv1 = nn.Conv2d(in_channels, 96, kernel_size=3, stride=1, padding=0, bias=False)
        bn1 = nn.BatchNorm2d(96)
        conv2 = nn.Conv2d(96, 192, kernel_size=3, stride=1, padding=0, bias=False)
        bn2 = nn.BatchNorm2d(192)
        conv3 = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=0, bias=False)
        bn3 = nn.BatchNorm2d(192)
        conv4 = nn.Conv2d(192, 96, kernel_size=3, stride=1, padding=0, bias=False)
        bn4 = nn.BatchNorm2d(96)
        conv5 = nn.Conv2d(96, 48, kernel_size=3, stride=1, padding=0, bias=False)
        bn5 = nn.BatchNorm2d(48)
        relu = nn.ReLU(inplace=True)
        avgpool = nn.AvgPool2d((3, 3))
        flatten = nn.Flatten()
        linear = nn.Linear(48, 1)
        sigmoid = nn.Sigmoid()

        self.layers = nn.Sequential(
            conv1,
            bn1,
            relu,
            conv2,
            bn2,
            relu,
            conv3,
            bn3,
            relu,
            conv4,
            bn4,
            relu,
            conv5,
            bn5,
            relu,
            avgpool,
            flatten,
            linear,
            sigmoid,
        )

        self.layers.apply(self.init_param)

    def forward(self, x):
        for itr, layer in enumerate(self.layers):
          x = layer(x)

        return x

    def init_param(self, param):
        if type(param) in [nn.Linear, nn.Conv2d]:
            nn.init.kaiming_uniform_(param.weight)

In [None]:
def train_subnet(
    resnet_model,
    interm_layer,
    subnet_model,
    subnet_optimizer,
    subnet_criterion,
    subnet_scheduler,
    unattacked_train_data,
    unattacked_test_data,
    attacked_train_data,
    attacked_test_data,
    device,
    epochs=100,
    batch_size=64,
):
    subnet_model.train()
    resnet_model.eval()
    batches = []

    for epoch in range(epochs):

      avg_loss = 0.0
      for batch_itr in tqdm(range(0, len(unattacked_train_data), batch_size)):
          attacked_input = attacked_train_data[batch_itr:batch_itr+batch_size]
          attacked_labels = torch.ones((attacked_input.shape[0], 1), dtype=torch.float32)
          unattacked_input = unattacked_train_data[batch_itr:batch_itr+batch_size]
          unattacked_labels = torch.zeros((unattacked_input.shape[0], 1), dtype=torch.float32)

          input = torch.cat((unattacked_input, attacked_input), axis=0)
          labels = torch.cat((unattacked_labels, attacked_labels), axis=0)

          assert input.shape[0] == labels.shape[0]
          shuffle_indices = np.arange(input.shape[0])
          np.random.shuffle(shuffle_indices)
          input, labels = input[shuffle_indices].squeeze(0), labels[shuffle_indices].squeeze(0)

          input, labels = input.to(device), labels.to(device)

          input = resnet_model(input.float(), return_interm_layer=interm_layer)
          output = subnet_model(input)

          loss = subnet_criterion(output, labels)
          loss.backward()
          avg_loss += loss.item()

          subnet_optimizer.step()

          del input
          del labels
          del loss
          torch.cuda.empty_cache()

      val_loss, val_acc, val_roc = test_subnet(
          resnet_model,
          interm_layer,
          subnet_model,
          subnet_criterion,
          unattacked_test_data,
          attacked_test_data,
          device,
      )

      print('Val Loss: {:.4f} | Val Accuracy: {:.4f} | Val ROC: {:.4f}'.format(val_loss, val_acc, val_roc))
      torch.save({
          'epoch': epoch,
          'model_state_dict': resnet_model.state_dict(),
          'optimizer_state_dict': subnet_optimizer.state_dict(),
      }, './' + str(epoch) + 'model.pt')

      # subnet_scheduler.step(val_loss)

def test_subnet(
    resnet_model,
    interm_layer,
    subnet_model,
    criterion,
    unattacked_test_data,
    attacked_test_data,
    device,
    batch_size=64,
):
    resnet_model.eval()
    subnet_model.eval()
    test_loss = []
    accuracies = []
    roc_scores = []

    for batch_itr in tqdm(range(0, len(unattacked_test_data), batch_size)):
        attacked_input = attacked_test_data[batch_itr:batch_itr+batch_size]
        attacked_labels = torch.ones((attacked_input.shape[0], 1), dtype=torch.float32)
        unattacked_input = unattacked_test_data[batch_itr:batch_itr+batch_size]
        unattacked_labels = torch.zeros((unattacked_input.shape[0], 1), dtype=torch.float32)

        input = torch.cat((unattacked_input, attacked_input), axis=0)
        labels = torch.cat((unattacked_labels, attacked_labels), axis=0)

        assert input.shape[0] == labels.shape[0]
        shuffle_indices = np.arange(input.shape[0])
        np.random.shuffle(shuffle_indices)
        input, labels = input[shuffle_indices].squeeze(0), labels[shuffle_indices].squeeze(0)

        input, labels = input.to(device), labels.to(device)

        with torch.no_grad():
            input = resnet_model(input.float(), return_interm_layer=interm_layer)
            output = subnet_model(input)

        pred_labels = (output > 0.5).float()
        loss = criterion(output, labels)

        # print()
        # print(pred_labels.flatten())
        # print(labels.flatten())
        # print()

        accuracy = accuracy_score(labels.flatten().cpu(), pred_labels.flatten().cpu())
        roc_score = roc_auc_score(labels.flatten().cpu(), pred_labels.flatten().cpu())
        accuracies.append(accuracy)
        roc_scores.append(roc_score)
        test_loss.extend([loss.item()]*input.size()[0])
        
        del input
        del labels
        del loss
        torch.cuda.empty_cache()

    subnet_model.train()

    return np.mean(test_loss), np.mean(accuracies), np.mean(roc_scores)

In [None]:
def Normalize0to1(AA):
    AA[:, :, :, 0] -= AA[:, :, :, 0].min().item()
    AA[:, :, :, 0] /= AA[:, :, :, 0].max().item()

    AA[:, :, :, 1] -= AA[:, :, :, 1].min().item()
    AA[:, :, :, 1] /= AA[:, :, :, 1].max().item()

    AA[:, :, :, 2] -= AA[:, :, :, 2].min().item()
    AA[:, :, :, 2] /= AA[:, :, :, 2].max().item()

    return AA / 225.

In [None]:
!ls '/content/drive/MyDrive/11785 - Project/data'

benign_cifar.npy	      pgd_cifar_default_art.npy
benign_mnist.npy	      pgd_cifar_default_torchattacks_new.npy
cwlinf_cifar_default_art.npy  pgd_cifar_eps0.1_torchattacks.npy
cwlinf_mnist_default_art.npy  pgd_cifar_eps0.3_alpha0.1_steps7.npy
fgsm_cifar_default_art.npy    pgd_mnist_default_art.npy
fgsm_mnist_eps0.5.npy	      pgd_mnist_eps0.3_alpha0.1_steps7.npy


In [None]:
batch_size = 64

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

unattacked_data_path = "/content/drive/MyDrive/11785 - Project/data/benign_mnist.npy"
unattacked_data = np.load(unattacked_data_path, allow_pickle=True).astype(float)

# unattacked_data = torch.from_numpy(unattacked_data.transpose(0, 3, 1, 2))
unattacked_data = torch.from_numpy(unattacked_data)
print(f"unattacked data shape: {unattacked_data.shape}")

# # pre-processing
# unattacked_data = Normalize0to1(unattacked_data)
# unattacked_data = normalize(unattacked_data)

# train-test split
unattacked_train_data = unattacked_data[:9000]
unattacked_test_data = unattacked_data[9000:]



attacked_data_path = "/content/drive/MyDrive/11785 - Project/data/cwlinf_mnist_default_art.npy"
# attacked_data_path = "/content/drive/MyDrive/11785 - Project/data/cwlinf_cifar_default_art.npy"
# attacked_data_path = "/content/drive/MyDrive/11785 - Project/data/fgsm_cifar_default_art.npy"
# attacked_data_path = "/content/drive/MyDrive/11785 - Project/data/pgd_cifar_default_torchattacks_new.npy"
attacked_data = np.load(attacked_data_path, allow_pickle=True).astype(float)
# attacked_data = torch.from_numpy(attacked_data.transpose(1, 0, 2, 3))
attacked_data = torch.from_numpy(attacked_data)
print(f"attacked data shape: {attacked_data.shape}")

# # pre-processing
# attacked_data = Normalize0to1(attacked_data)
# attacked_data = normalize(attacked_data)

# train-test split
attacked_train_data = attacked_data[:9000]
attacked_test_data = attacked_data[9000:]

unattacked data shape: torch.Size([10000, 1, 28, 28])
attacked data shape: torch.Size([10000, 1, 28, 28])


In [None]:
! ls "/content/drive/MyDrive/11785 - Project/"

AdversarialDetection.pdf  data		      mnist_model.pth
cifar10_model.pth	  Experiments.gsheet  Presentation.gslides


In [None]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")

resnet_model = resnet32()
resnet_model.to(device)
checkpoint = torch.load("/content/drive/MyDrive/11785 - Project/mnist_model.pth")
# mod_checkpoint = {k.replace("module.", ""): v for k, v in checkpoint['state_dict'].items()}
resnet_model.load_state_dict(checkpoint)
resnet_optimizer = torch.optim.SGD(resnet_model.parameters(), lr=0.1, weight_decay=5e-5, momentum=0.9)
resnet_criterion = nn.CrossEntropyLoss()
resnet_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(resnet_optimizer, T_0=10, T_mult=2, eta_min=0.01, last_epoch=-1)

# output size at diff intermediate layers of resnet
interm_layer2dim = {1: 16, 2: 32, 3: 64}
interm_layer = 2

subnet_model = SubNet(interm_layer2dim[interm_layer])
subnet_model.to(device)
subnet_optimizer = torch.optim.Adam(subnet_model.parameters(), lr=0.01, betas=(0.99, 0.999))
subnet_criterion = nn.BCELoss()
subnet_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(subnet_optimizer, T_0=10, T_mult=2, eta_min=0.01, last_epoch=-1)

In [None]:
train_subnet(
  resnet_model,
  interm_layer,
  subnet_model,
  subnet_optimizer,
  subnet_criterion,
  subnet_scheduler,
  unattacked_train_data,
  unattacked_test_data,
  attacked_train_data,
  attacked_test_data,
  device,
  epochs=5,
)

100%|██████████| 141/141 [00:05<00:00, 24.85it/s]
100%|██████████| 16/16 [00:00<00:00, 96.92it/s]


Val Loss: 0.8361 | Val Accuracy: 0.4995 | Val ROC: 0.4995


100%|██████████| 141/141 [00:05<00:00, 25.01it/s]
100%|██████████| 16/16 [00:00<00:00, 98.14it/s]


Val Loss: 6.7950 | Val Accuracy: 0.4604 | Val ROC: 0.4604


100%|██████████| 141/141 [00:05<00:00, 25.07it/s]
100%|██████████| 16/16 [00:00<00:00, 98.62it/s]


Val Loss: 1.1217 | Val Accuracy: 0.5456 | Val ROC: 0.5456


100%|██████████| 141/141 [00:05<00:00, 25.05it/s]
100%|██████████| 16/16 [00:00<00:00, 98.76it/s]


Val Loss: 1.7202 | Val Accuracy: 0.5457 | Val ROC: 0.5457


100%|██████████| 141/141 [00:05<00:00, 25.06it/s]
100%|██████████| 16/16 [00:00<00:00, 96.38it/s]


Val Loss: 1.3059 | Val Accuracy: 0.5133 | Val ROC: 0.5133


In [None]:
for itr in range(100, 110):
  print(torch.argmax(resnet_model(attacked_data[itr].unsqueeze(0).to(device).float())))

In [None]:
for itr in range(100, 110):
  print(torch.argmax(resnet_model(unattacked_data[1].unsqueeze(0).to(device).float())))

In [None]:
def clean_dataset(
    resnet_model,
    interm_layer,
    subnet_model,
    test_data,
    device,
    batch_size=64,
):
    resnet_model.eval()
    subnet_model.eval()
    clean_idxs = []

    print(test_data.shape)
    for batch_itr in range(0, len(test_data), batch_size):
        input = test_data[batch_itr:batch_itr+batch_size]
        input = input.to(device)

        with torch.no_grad():
            input = resnet_model(input.float(), return_interm_layer=interm_layer)
            output = subnet_model(input)

        pred_labels = (output > 0.5).float()
        pred_labels = pred_labels.flatten().cpu()
        clean_idxs.extend([
            batch_itr+idx for idx in range(len(pred_labels)) \
              if pred_labels[idx] == 0
        ])
        
        del input
        torch.cuda.empty_cache()

    return clean_idxs

In [None]:
np.vstack([unattacked_data, attacked_data]).shape

(20000, 1, 28, 28)

In [None]:
cleaned = clean_dataset(
  resnet_model,
  interm_layer,
  subnet_model,
  torch.from_numpy(np.vstack([unattacked_data, attacked_data])),
  device,
)

print(len(cleaned))
print(cleaned)

torch.Size([20000, 1, 28, 28])
9216
[0, 1, 2, 5, 10, 13, 14, 15, 17, 22, 24, 25, 26, 28, 31, 33, 34, 35, 36, 37, 39, 40, 46, 47, 55, 57, 60, 61, 64, 66, 67, 70, 71, 72, 74, 77, 79, 81, 82, 83, 84, 85, 86, 89, 90, 94, 95, 96, 98, 100, 102, 103, 106, 107, 110, 126, 133, 134, 135, 136, 137, 140, 141, 142, 143, 145, 147, 148, 151, 154, 158, 160, 163, 164, 168, 172, 173, 174, 176, 178, 180, 181, 183, 186, 189, 190, 191, 194, 196, 201, 202, 203, 204, 208, 210, 215, 216, 220, 221, 223, 224, 225, 226, 227, 228, 230, 233, 234, 235, 236, 237, 239, 246, 247, 249, 250, 251, 254, 256, 258, 259, 260, 261, 262, 265, 266, 267, 268, 271, 272, 274, 276, 277, 278, 279, 280, 285, 286, 287, 288, 289, 290, 291, 294, 298, 299, 302, 303, 305, 306, 308, 310, 311, 312, 314, 317, 318, 321, 323, 324, 326, 327, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 341, 342, 343, 345, 346, 348, 349, 350, 352, 353, 354, 357, 362, 363, 365, 367, 368, 369, 370, 372, 377, 378, 380, 381, 383, 385, 388, 390, 391, 393, 3

### Unseen Attack Test

In [None]:
batch_size = 64

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

unattacked_data_path = "/content/drive/MyDrive/11785 - Project/data/benign_cifar.npy"
unattacked_data = np.load(unattacked_data_path, allow_pickle=True).astype(float)

unattacked_data = torch.from_numpy(unattacked_data)
print(f"unattacked data shape: {unattacked_data.shape}")
unattacked_test_data = unattacked_data

attacked_data_path = "/content/drive/MyDrive/11785 - Project/data/fgsm_cifar_default_art.npy"
attacked_data = np.load(attacked_data_path, allow_pickle=True).astype(float)

attacked_data = torch.from_numpy(attacked_data)
print(f"attacked data shape: {attacked_data.shape}")
attacked_test_data = attacked_data

unattacked data shape: torch.Size([10000, 3, 32, 32])
attacked data shape: torch.Size([10000, 3, 32, 32])


In [None]:
val_loss, val_acc, val_roc = test_subnet(
  resnet_model,
  interm_layer,
  subnet_model,
  subnet_criterion,
  unattacked_test_data,
  attacked_test_data,
  device,
)

print('Val Loss: {:.4f} | Val Accuracy: {:.4f} | Val ROC: {:.4f}'.format(val_loss, val_acc, val_roc))

100%|██████████| 157/157 [00:05<00:00, 29.22it/s]

Val Loss: 0.0450 | Val Accuracy: 0.9996 | Val ROC: 0.9996



