In [1]:
import torch
import torchvision

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                             ])),
  batch_size=5000, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                             ])),
  batch_size=5000, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST/raw/train-images-idx3-ubyte.gz to MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST/raw/train-labels-idx1-ubyte.gz to MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST/raw
Processing...
Done!





  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [2]:
!git clone https://github.com/alecwangcq/KFAC-Pytorch

Cloning into 'KFAC-Pytorch'...
remote: Enumerating objects: 52, done.[K
remote: Total 52 (delta 0), reused 0 (delta 0), pack-reused 52[K
Unpacking objects: 100% (52/52), done.


In [3]:
%cd /content/KFAC-Pytorch

/content/KFAC-Pytorch


In [4]:
import math

import torch
import torch.optim as optim

from utils.kfac_utils import (ComputeCovA, ComputeCovG)
#from utils.kfac_utils import update_running_stat
from IPython.core.debugger import set_trace

def update_running_stat(aa, m_aa, stat_decay):
    # using inplace operation to save memory!
    m_aa = m_aa * stat_decay / (1 - stat_decay)
    m_aa = m_aa + aa
    m_aa = m_aa*(1 - stat_decay)

class KFACOptimizer(optim.Optimizer):
    def __init__(self,
                 model,
                 lr=0.001,
                 momentum=0.9,
                 stat_decay=0.95,
                 damping=0.001,
                 kl_clip=0.001,
                 weight_decay=0,
                 TCov=10,
                 TInv=100,
                 batch_averaged=True):
        if lr < 0.0:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if momentum < 0.0:
            raise ValueError("Invalid momentum value: {}".format(momentum))
        if weight_decay < 0.0:
            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
        defaults = dict(lr=lr, momentum=momentum, damping=damping,
                        weight_decay=weight_decay)
        # TODO (CW): KFAC optimizer now only support model as input
        super(KFACOptimizer, self).__init__(model.parameters(), defaults)
        self.CovAHandler = ComputeCovA()
        self.CovGHandler = ComputeCovG()
        self.batch_averaged = batch_averaged

        self.known_modules = {'Linear', 'Conv2d'}

        self.modules = []
        self.grad_outputs = {}

        self.model = model
        self._prepare_model()

        self.steps = 0

        self.m_aa, self.m_gg = {}, {}
        self.Q_a, self.Q_g = {}, {}
        self.d_a, self.d_g = {}, {}
        self.stat_decay = stat_decay

        self.kl_clip = kl_clip
        self.TCov = TCov
        self.TInv = TInv

    def _save_input(self, module, input):
        if torch.is_grad_enabled() and self.steps % self.TCov == 0:
            aa = self.CovAHandler(input[0], module)
            # Initialize buffers
            if self.steps == 0:
                self.m_aa[module] = torch.diag(aa.new(aa.size(0)).fill_(1))
            update_running_stat(aa, self.m_aa[module], self.stat_decay)

    def _save_grad_output(self, module, grad_input, grad_output):
        # Accumulate statistics for Fisher matrices
        if self.acc_stats and self.steps % self.TCov == 0:
            gg = self.CovGHandler(grad_output[0], module, self.batch_averaged)
            # Initialize buffers
            if self.steps == 0:
                self.m_gg[module] = torch.diag(gg.new(gg.size(0)).fill_(1))
            update_running_stat(gg, self.m_gg[module], self.stat_decay)

    def _prepare_model(self):
        count = 0
        print(self.model)
        print("=> We keep following layers in KFAC. ")
        for module in self.model.modules():
            classname = module.__class__.__name__
            # print('=> We keep following layers in KFAC. <=')
            if classname in self.known_modules:
                self.modules.append(module)
                module.register_forward_pre_hook(self._save_input)
                module.register_backward_hook(self._save_grad_output)
                print('(%s): %s' % (count, module))
                count += 1

    def _update_inv(self, m):

        """Do eigen decomposition for computing inverse of the ~ fisher.
        :param m: The layer
        :return: no returns.
        """

        eps = 1e-6  # for numerical stability
        scale_a = (self.m_aa[m].max()-self.m_aa[m].min())*eps
        scale_g = (self.m_gg[m].max()-self.m_gg[m].min())*eps

        eps_matrix_a = torch.diag(torch.rand(self.m_aa[m].shape[0])).cuda() * scale_a
        eps_matrix_g = torch.diag(torch.rand(self.m_gg[m].shape[0])).cuda() * scale_g
        self.d_a[m], self.Q_a[m] = torch.symeig(
            self.m_aa[m]+eps_matrix_a, eigenvectors=True)
        self.d_g[m], self.Q_g[m] = torch.symeig(
            self.m_gg[m]+eps_matrix_g, eigenvectors=True)

        self.d_a[m] = self.d_a[m].mul((self.d_a[m] > eps).float())
        self.d_g[m] = self.d_g[m].mul((self.d_g[m] > eps).float())

    @staticmethod
    def _get_matrix_form_grad(m, classname):
        
        """
        :param m: the layer
        :param classname: the class name of the layer
        :return: a matrix form of the gradient. it should be a [output_dim, input_dim] matrix.
        """

        if classname == 'Conv2d':
            p_grad_mat = m.weight.grad.view(m.weight.grad.data.size(0), -1)  # n_filters * (in_c * kw * kh)
        else:
            p_grad_mat = m.weight.grad
        if m.bias is not None:
            p_grad_mat = torch.cat([p_grad_mat, m.bias.grad.view(-1, 1)], 1)
        return p_grad_mat

    def _get_natural_grad(self, m, p_grad_mat, damping):
        """
        :param m:  the layer
        :param p_grad_mat: the gradients in matrix form
        :return: a list of gradients w.r.t to the parameters in `m`
        """
        # p_grad_mat is of output_dim * input_dim
        # inv((ss')) p_grad_mat inv(aa') = [ Q_g (1/R_g) Q_g^T ] @ p_grad_mat @ [Q_a (1/R_a) Q_a^T]
        v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
        v2 = v1 / (self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + damping)
        v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

        if m.bias is not None:
            # we always put gradient w.r.t weight in [0]
            # and w.r.t bias in [1]
            v = [v[:, :-1], v[:, -1:]]
            v[0] = v[0].view(m.weight.grad.data.size())
            v[1] = v[1].view(m.bias.grad.data.size())
        else:
            v = [v.view(m.weight.grad.data.size())]

        return v

    def _kl_clip_and_update_grad(self, updates, lr):
        # do kl clip
        vg_sum = 0
        for m in self.modules:
            v = updates[m]
            vg_sum += (v[0] * m.weight.grad * lr ** 2).sum().item()
            if m.bias is not None:
                vg_sum += (v[1] * m.bias.grad * lr ** 2).sum().item()
        nu = min(1.0, math.sqrt(self.kl_clip / vg_sum))

        for m in self.modules:
            v = updates[m]
            m.weight.grad.data.copy_(v[0])
            m.weight.grad = m.weight.grad.mul(nu)
            if m.bias is not None:
                m.bias.grad.copy_(v[1])
                m.bias.grad = m.bias.grad.mul(nu)

    def _step(self, closure):
        # FIXME (CW): Modified based on SGD (removed nestrov and dampening in momentum.)
        # FIXME (CW): 1. no nesterov, 2. buf.mul_(momentum).add_(1 <del> - dampening </del>, d_p)
        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad
                if weight_decay != 0 and self.steps >= 20 * self.TCov:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
                        buf.mul_(momentum).add_(d_p)
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1, d_p)
                    d_p = buf

                p.data.add_(-group['lr'], d_p)

    def step(self, closure=None):
        # FIXME(CW): temporal fix for compatibility with Official LR scheduler.
        group = self.param_groups[0]
        lr = group['lr']
        damping = group['damping']
        self.updates = {}
        for m in self.modules:
            classname = m.__class__.__name__
            if self.steps % self.TInv == 0:
                self._update_inv(m)
            p_grad_mat = self._get_matrix_form_grad(m, classname)
            v = self._get_natural_grad(m, p_grad_mat, damping)
            self.updates[m] = v
        self._kl_clip_and_update_grad(self.updates, lr)

        self._step(closure)
        self.steps += 1
    def get_fisher_cond(self):
        total_cond = torch.tensor(1., requires_grad=True)
        for matrix in list(self.m_aa.values()):
            cond = torch.norm(matrix)/torch.norm(matrix.inverse())
            total_cond = total_cond+cond
        for diag in list(self.m_gg.values()):
            cond = torch.norm(matrix)/torch.norm(matrix.inverse())
            total_cond = total_cond+cond
        ein_l = total_cond*0.1
        return ein_l

In [23]:
import numpy as np
import torch.nn.functional as F
from IPython.core.debugger import set_trace

kl_loss = torch.nn.KLDivLoss(reduction='batchmean')
def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

import contextlib
import torch
import torch.nn as nn
import torch.nn.functional as F


@contextlib.contextmanager
def _disable_tracking_bn_stats(model):

    def switch_attr(m):
        if hasattr(m, 'track_running_stats'):
            m.track_running_stats ^= True
            
    model.apply(switch_attr)
    yield
    model.apply(switch_attr)


def _l2_normalize(d):
    d_reshaped = d.view(d.shape[0], -1, *(1 for _ in range(d.dim() - 2)))
    d /= torch.norm(d_reshaped, dim=1, keepdim=True) + 1e-8
    return d

In [24]:
import torch
import torch.nn as nn
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #feed forward layers
        self.linear1 = nn.Linear(28*28,500)
        self.linear2 = nn.Linear(500,250)
        self.linear3 = nn.Linear(250,10)        
        
        #activations
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid() #Use sigmoid to convert the output into range (0,1)
        self.softmax = nn.Softmax()
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.relu(out)
        out = self.linear3(out)
        return out


torch.manual_seed(0)

net = Net()
net.cuda()

opt_adam = torch.optim.Adam(net.parameters(),lr=1e-3)
opt = KFACOptimizer(net, lr=.01, TCov=1, TInv=1, weight_decay=0.01)
loss = nn.CrossEntropyLoss()

weights = []
for name, param in net.named_parameters():
    if "weight" in name:
        weights.append(param)
print(len(weights))

Net(
  (linear1): Linear(in_features=784, out_features=500, bias=True)
  (linear2): Linear(in_features=500, out_features=250, bias=True)
  (linear3): Linear(in_features=250, out_features=10, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
  (softmax): Softmax(dim=None)
)
=> We keep following layers in KFAC. 
(0): Linear(in_features=784, out_features=500, bias=True)
(1): Linear(in_features=500, out_features=250, bias=True)
(2): Linear(in_features=250, out_features=10, bias=True)
3


In [27]:
import copy

torch.autograd.set_detect_anomaly(True)
loss_list = []
acc_list = []
total_step = len(train_loader)
num_epochs = 10
i = 0

old = 20
alpha = 1

ein_loss_list = []

class EINLoss(nn.Module):
    def __init__(self, alpha):
        super(EINLoss, self).__init__()
        self.alpha = alpha
    def forward(self):
        vs = []
        outs = []
        sizes = {}
        classname = 'Linear'
        with torch.no_grad():
            for m in opt.modules:
                p_grad_mat = opt._get_matrix_form_grad(m, classname)
                size = p_grad_mat.size()
                sizes[m] = size
        for m in opt.modules:
            classname = m.__class__.__name__
            #if self.steps % self.TInv == 0:
            v = torch.rand(sizes[m], requires_grad=True).cuda()
            for x in range(10):
                v1 = opt.Q_g[m].t() @ v @ opt.Q_a[m]
                v2 = v1 / (opt.d_g[m].unsqueeze(1) * opt.d_a[m].unsqueeze(0))
                v = opt.Q_g[m] @ v2 @ opt.Q_a[m].t()
                v = v/torch.norm(v)
            v1 = opt.Q_g[m].t() @ v @ opt.Q_a[m]
            v2 = v1 / (opt.d_g[m].unsqueeze(1) * opt.d_a[m].unsqueeze(0))
            out = opt.Q_g[m] @ v2 @ opt.Q_a[m].t()
            
            v = v.reshape(-1)
            out = out.reshape(-1)

            vs.append(v)
            outs.append(out)

        out = torch.cat(outs)
        v = torch.cat(vs)   
        eig = torch.dot(v.t(), out)
        eig = eig/torch.dot(v.t(), v)
        return eig*self.alpha


class EINLoss(nn.Module):
    def __init__(self, alpha):
        super(EINLoss, self).__init__()
        self.alpha = alpha
    def forward(self, opt):
        total_cond = torch.tensor(1., requires_grad=True)
        for matrix in list(opt.m_aa.values()):
            cond = torch.norm(matrix)/torch.norm(matrix.inverse())
            total_cond = total_cond+cond
        for diag in list(opt.m_gg.values()):
            cond = torch.norm(matrix)/torch.norm(matrix.inverse())
            total_cond = total_cond+cond
        ein_l = total_cond*self.alpha
        return ein_l

ein_loss = EINLoss(alpha)

old_acc = 0
init = False
ein_loss_list = []
for epoch in range(num_epochs):
    print('epoch',epoch)
    for i,(image,labels) in enumerate(train_loader):
        print('batch',i)
        image = image.view(-1,784).cuda()
        outputs = net(image)

        labels = labels.view(-1).long().cuda()

        if opt.steps % opt.TCov == 0:
            # compute true fisher
            opt.acc_stats = True
            with torch.no_grad():
                sampled_y = torch.multinomial(torch.nn.functional.softmax(outputs.cpu().data, dim=1),
                                              1).cuda().view(-1)
            loss_sample = loss(outputs, sampled_y)
            loss_sample.backward(retain_graph=True)
            opt.acc_stats = False
            opt.zero_grad()  # clear the gradient for computing true-fisher.     
        
        if init==True:
            l = loss(outputs,labels)
            ein_l = opt.get_fisher_cond()
            ein_l.retain_grad()
            ein_loss_list.append(ein_l.item())
            #total_loss = l+ein_l
        else:
            total_loss = loss(outputs,labels)  
            init=True
        #opt.zero_grad()
        if init==True:
            ein_l.backward()
        else:
            total_loss.backward()
        #nn.utils.clip_grad_value_(net.parameters(), .0001)
        opt.step()

        loss_list.append(total_loss.item())
        total = labels.size(0)
        _,predicted = torch.max(outputs.data,1)
        correct = (predicted==labels).sum().item()
        acc_list.append(correct/total)
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                .format(epoch + 1, num_epochs, i + 1, total_step, total_loss.item(),
                        correct / total) * 100)
    total_test = 0
    correct_test = 0
    for i,(image,labels) in enumerate(test_loader):
        image = image.view(-1,784).cuda()
        outputs = net(image)
        labels = labels.view(-1).long().cuda()
        _,predicted = torch.max(outputs.data,1)
        correct = (predicted==labels).sum().item()
        correct_test+=correct
        total = labels.size(0)
        total_test+=total
    acc = correct_test/total_test
    if acc<old_acc:
        break
    print('TEST ACC: ',acc, 'CORRECT:', correct_test, 'TOTAL:', total_test)

epoch 0
batch 0


RuntimeError: ignored

In [26]:
ein_l

tensor(7., device='cuda:0', grad_fn=<MulBackward0>)

In [16]:
PATH = '/content/drive/My Drive/KFAC/MLP_KFAC_YAR.pt'
torch.save(net.state_dict(), PATH)

In [12]:
torch.cuda.empty_cache()

In [None]:
for x,i in net.named_parameters():
    print(x,i)

In [32]:
epsilons = [0, 0.02, .05, 0.07, .10, .15, .20, .25, .3, .35, .4]

def test(model, test_loader, epsilon):

    # Accuracy counter
    correct = 0
    total = 0

    adv = []
    # Loop over all examples in test set
    for data, target in test_loader:

        # Set requires_grad attribute of tensor. Important for Attack

        # Forward pass the data through the model
        data = data.view(-1,784).cuda()
        data.requires_grad = True

        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

        # If the initial prediction is wrong, dont bother attacking, just move on
        """if epsilon==0:
            correct+= (init_pred.view(-1)==target.view(-1)).sum()
            total +=len(init_pred.view(-1))
            continue"""
        # Calculate the loss
        l = loss(output, target.cuda())

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        l.backward()

        # Collect datagrad
        data_grad = data.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        # Re-classify the perturbed image
        perturbed_data = perturbed_data.view(-1,784)
        output = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct+= (final_pred.cpu().view(-1)==target.cpu().view(-1)).sum()
        total +=len(final_pred.view(-1))
        adv.append(perturbed_data[:5])

    # Calculate final accuracy for this epsilon
    final_acc = correct.item()/total
    print("Epsilon: {}\tTest Accuracy = {} Correct = {} Total = {}".format(epsilon, final_acc,correct,total))

    # Return the accuracy and an adversarial example
    return final_acc, adv

In [33]:
accuracies_icm = []
examples_icm = []

# Run test for each epsilon
for eps in epsilons:
    acc, adv = test(net, test_loader, eps)
    accuracies_icm.append(acc)
    examples_icm.append(adv)

Epsilon: 0	Test Accuracy = 0.8266 Correct = 8266 Total = 10000
Epsilon: 0.02	Test Accuracy = 0.7598 Correct = 7598 Total = 10000
Epsilon: 0.05	Test Accuracy = 0.6401 Correct = 6401 Total = 10000
Epsilon: 0.07	Test Accuracy = 0.5484 Correct = 5484 Total = 10000
Epsilon: 0.1	Test Accuracy = 0.3986 Correct = 3986 Total = 10000
Epsilon: 0.15	Test Accuracy = 0.1571 Correct = 1571 Total = 10000
Epsilon: 0.2	Test Accuracy = 0.0504 Correct = 504 Total = 10000
Epsilon: 0.25	Test Accuracy = 0.0101 Correct = 101 Total = 10000
Epsilon: 0.3	Test Accuracy = 0.0015 Correct = 15 Total = 10000
Epsilon: 0.35	Test Accuracy = 0.0 Correct = 0 Total = 10000
Epsilon: 0.4	Test Accuracy = 0.0 Correct = 0 Total = 10000


In [36]:
accuracies_kfac_yar = []
examples_kfac_yar = []

# Run test for each epsilon
for eps in epsilons:
    acc, adv = test(net, test_loader, eps)
    accuracies_kfac_yar.append(acc)
    examples_kfac_yar.append(adv)

Epsilon: 0	Test Accuracy = 0.8029 Correct = 8029 Total = 10000
Epsilon: 0.02	Test Accuracy = 0.7364 Correct = 7364 Total = 10000
Epsilon: 0.05	Test Accuracy = 0.6248 Correct = 6248 Total = 10000
Epsilon: 0.07	Test Accuracy = 0.5388 Correct = 5388 Total = 10000
Epsilon: 0.1	Test Accuracy = 0.3988 Correct = 3988 Total = 10000
Epsilon: 0.15	Test Accuracy = 0.1643 Correct = 1643 Total = 10000
Epsilon: 0.2	Test Accuracy = 0.0521 Correct = 521 Total = 10000
Epsilon: 0.25	Test Accuracy = 0.011 Correct = 110 Total = 10000
Epsilon: 0.3	Test Accuracy = 0.0009 Correct = 9 Total = 10000
Epsilon: 0.35	Test Accuracy = 0.0 Correct = 0 Total = 10000
Epsilon: 0.4	Test Accuracy = 0.0 Correct = 0 Total = 10000


In [11]:
!pip install plotly -U

Requirement already up-to-date: plotly in /usr/local/lib/python3.6/dist-packages (4.12.0)


In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Scatter(x=epsilons, y=accuracies_icm, name='KFAC'))
fig.add_trace(go.Scatter(x=epsilons, y=accuracies, name='Adam'))
fig.update_layout(dict(title='FGSM Attack at Varying Epsilons'))
fig.update_xaxes(title='Epsilon')
fig.update_yaxes(title='Accuracy')

fig

In [None]:
examples[0]

[tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0',
        grad_fn=<SliceBackward>), tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0',
        grad_fn=<SliceBackward>)]

In [None]:
.data.cpu().numpy().reshape(28,28)

array([[0.02      , 0.        , 0.02      , 0.02      , 0.02      ,
        0.02      , 0.        , 0.        , 0.02      , 0.        ,
        0.        , 0.        , 0.02      , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.02      , 0.        ,
        0.        , 0.        , 0.02      , 0.02      , 0.        ,
        0.02      , 0.        , 0.        ],
       [0.02      , 0.        , 0.02      , 0.        , 0.02      ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.02      ,
        0.02      , 0.        , 0.        , 0.02      , 0.02      ,
        0.02      , 0.        , 0.        , 0.        , 0.02      ,
        0.        , 0.        , 0.        ],
       [0.        , 0.02      , 0.02      , 0.        , 0.        ,
        0.02      , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.02      , 0.02      ,
        0.02      , 0.02  

In [None]:
from PIL import Image
import plotly.graph_objects as go
import plotly.express as px
for idx in range(len(epsilons)):
    x = examples[idx][0][0]
    img= px.imshow(x.data.cpu().numpy().reshape(28,28))
    img.show()

In [None]:
x

tensor(0., device='cuda:0', grad_fn=<UnbindBackward>)

In [None]:
!pip install plotly -U

Collecting plotly
[?25l  Downloading https://files.pythonhosted.org/packages/68/47/cec583df9ffb6142b84698c1be2124ef8f786f1e9a4307fae331377ccddc/plotly-4.11.0-py2.py3-none-any.whl (13.1MB)
[K     |████████████████████████████████| 13.1MB 319kB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.11.0


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=epsilons, y=accuracies_icm[:6], name='Ill-Condition Matrix Loss'))
fig.add_trace(go.Scatter(x=epsilons, y=accuracies[:6], name='Cross Entropy Loss'))
fig.update_xaxes(title="FSGM Perturbment Epsilon")
fig.update_yaxes(title="Adverserial Test Accuracy")
fig