<h1> MACER Algorithm to train provably robust models </h1>
This notebook demonstrates how to use the ART library to learn robust model on CIFAR-10 dataset using MACER algorithm. <br>
In this example notebook we will be showing MACER algorithm implementation using PyTorch.



Let's walk through some initial work steps ensuring that the notebook will work smoothly. We will:

1. set up a small configuration cell
2. load data and apply transformations on the data
3. define and load the model (resnet110)
4. define the optimizer (SGD) and the schedular (MultiStep)
 

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, MultiStepLR
from torch.utils.data import DataLoader

from art.utils import load_dataset, random_targets, compute_accuracy,load_cifar10
from art.estimators.certification.randomized_smoothing import (PyTorchRandomizedSmoothing)
from torch.nn import CrossEntropyLoss

import math
import random
import numpy as np
import matplotlib.pyplot as plt
from art.estimators.classification.pytorch import PyTorchClassifier 
from art.data_generators import PyTorchDataGenerator

In [None]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

<h1> Load Data </h1>
We are loading CIFAR10 dataset and applying transformations (random cropping, random horizontal flip, convert image to Tensor) 

In [None]:
batch_size = 64
train_data = datasets.CIFAR10("./dataset_cache", train=True, download=True, transform=transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ]))
test_data = datasets.CIFAR10("./dataset_cache", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, num_workers=1)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size,
                             num_workers=1, pin_memory=True)

In [None]:
num_train_samples = 50000

x_train = torch.zeros((num_train_samples, 3, 32, 32), dtype=torch.float32)
y_train = torch.zeros((num_train_samples,), dtype=torch.uint8)

for i,(data,labels) in enumerate(train_loader):
    x_train[(i) * batch_size : (i+1) * batch_size, :, :, :] = data
    y_train[(i) * batch_size : (i+1) * batch_size] = labels

In [None]:
num_train_samples = 10000

x_test = torch.zeros((num_train_samples, 3, 32, 32), dtype=torch.float32)
y_test = torch.zeros((num_train_samples,), dtype=torch.uint8)

for i,(data,labels) in enumerate(test_loader):
    x_test[(i) * batch_size : (i+1) * batch_size, :, :, :] = data
    y_test[(i) * batch_size : (i+1) * batch_size] = labels

<h1> Train Classifiers </h1>
1. Defining and loading the resnet110 model.<br>
2. Defining the optimizer and scheduler

In [None]:
def conv3x3(in_planes, out_planes, stride=1):
  " 3x3 convolution with padding "
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(BasicBlock, self).__init__()
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = nn.BatchNorm2d(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = nn.BatchNorm2d(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class ResNet_Cifar(nn.Module):

  def __init__(self, block, layers, width=1, num_classes=10):
    super(ResNet_Cifar, self).__init__()
    self.inplanes = 16
    self.conv1 = nn.Conv2d(3, 16, kernel_size=3,
                           stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(16)
    self.relu = nn.ReLU(inplace=True)
    self.layer1 = self._make_layer(block, 16 * width, layers[0])
    self.layer2 = self._make_layer(block, 32 * width, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 64 * width, layers[2], stride=2)
    self.avgpool = nn.AvgPool2d(8, stride=1)
    self.fc = nn.Linear(64 * block.expansion * width, num_classes)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
          nn.Conv2d(self.inplanes, planes * block.expansion,
                    kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(planes * block.expansion)
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for _ in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)

    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x


def resnet110(**kwargs):
  model = ResNet_Cifar(BasicBlock, [18, 18, 18], width=1, **kwargs)
  return model

In [None]:
model = resnet110()

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = MultiStepLR(optimizer, milestones=[200,400], gamma=0.1)

In [None]:
print(model)

<h3>We are now ready to employ the ART library and train a provably robust smoothed classifier using MACER algorithm.</h3>
We will use PyTorchRandomizedSmoothing class of ART library to define the classifier and finally, fit the classifier using macer train_method. 

In [None]:
sigma_1 = 0.25
rs_macer_classifier = PyTorchRandomizedSmoothing(model=model,
    optimizer=optimizer,
    input_shape=(3, 32, 32),
    nb_classes=10,
    scale=sigma_1,
    lbd = 12.0,
    gamma = 8.0,
    beta = 16.0,
    gauss_num = 16,
    scheduler = scheduler)

In [None]:
rs_macer_classifier.fit(x_train, y_train, nb_epochs=440, batch_size=64, train_method = 'macer')

We can start training at a particular checkpoint by passing the path to checkpoint in the fit function argument.<br>
<br>
For Example,
<br>
rs_macer_classifier.fit(x_train, y_train, nb_epochs=440, batch_size=64, train_method = 'macer', checkpoint = path_to_checkpoint)

<h1> Predictions </h1>
Now we will use the trained provably robust smoothed classifier to predict the test dataset.

We one hot encode the output of test data, predict the first 500 images of the test data, and compute the accuracy and coverage.

In [None]:
y_test_encoded = F.one_hot(y_test.to(torch.int64))

In [None]:
x_preds_rs_1 = rs_macer_classifier.predict(x_test[:500])
acc_rs_1, cov_rs_1 = compute_accuracy(x_preds_rs_1, y_test_encoded[:500].numpy())
print("\nSmoothed Classifier, sigma=" + str(sigma_1))
print("Accuracy: {}".format(acc_rs_1))
print("Coverage: {}".format(cov_rs_1))

<h1> Certification </h1>
We will now certify our classifier to prove that our trained model can achieve provable robustness against any possible attack in the certified region.

We define some helpful Python functions for certification

In [None]:
# Calculate certification accuracy for a given radius
def getCertAcc(radius, pred, y_test):

    rad_list = np.linspace(0, 2.25, 201)
    cert_acc = []
    num_cert = len(radius)
    
    for r in rad_list:
        rad_idx = np.where(radius >= r)[0]
        y_test_subset = y_test[rad_idx]
        cert_acc.append(np.sum(pred[rad_idx] == y_test_subset) / num_cert)
    return cert_acc

In [None]:
def calculateACR(target, prediction, radius):
  tot = 0
  cnt = 0
  for i in range(0,len(prediction)):
    #class_index = np.where(target[i] == 1.0)
    if(prediction[i] == target[i]):
      tot += radius[i]
    cnt += 1
  return tot/cnt

<h3> Certified Radius for single image </h1>

In [None]:
#single image certification return certified radius, index or random 
index = random.randint(0,9999)
x_sample = x_test[index].expand((1,3,32,32))
prediction, radius = rs_macer_classifier.certify(x_sample, n = 100000)
print("Prediction: {} and Radius: {}".format(prediction,radius))

<h3>Certification on test images</h3>

In [None]:
start_img = 500
num_img = 500
skip = 1
N = 100000

In [None]:
#no.of test images for ACR/graph (ACR inside the graph)
prediction_1, radius_1 = rs_macer_classifier.certify(x_test[(start_img-1):(start_img-1)+(num_img*skip):skip], n=N)

In [None]:
acr = calculateACR(target=np.array(y_test[(start_img-1):(start_img-1)+(num_img*skip):skip]), prediction= np.array(prediction_1), radius = np.array(radius_1))
print("ACR: ",acr)

In [None]:
rad_list = np.linspace(0, 2.25, 201)
plt.plot(rad_list, getCertAcc(radius_1, prediction_1, np.array(y_test)), 'r-', label='smoothed, $\sigma=$' + str(sigma_1))
plt.xlabel('l2 radius')
plt.ylabel('certified accuracy')
plt.legend()
plt.title('Radius Accuracy Curves: ACR {}'.format(acr))
plt.show()