## Native Hv Product in pyTorch Test
Purpose:
- Demonstrate hessian calculation of a layer through `pytorchpruner.utils.hessian_fun`.
- Compare it with recent package of  [Mario's](`https://github.com/antigol`) [Time difference is because of the preloaded data and he updated the package and reported that now it is 4 times faster.


In [1]:
import torch
import os
import sys
sys.path.insert(0, '../')

import pytorchpruner

use_cuda = torch.cuda.is_available()

In [2]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import datasets
import random 

test_batch = 128
train_batch = 32

train_MNIST_data_full = datasets.MNIST('./data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))     
                   ]))

train_dataset=list(train_MNIST_data_full)[:train_batch]

random.seed(5)
torch.set_default_tensor_type('torch.FloatTensor')


trainloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=train_batch, shuffle=True)
testloader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)),
                   ])),
    batch_size=test_batch, shuffle=False)

In [18]:
from torch import nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 2, kernel_size=5)
        self.conv2 = nn.Conv2d(2, 1, kernel_size=5)
        self.fc1 = nn.Linear(16, 2)
        self.fc2 = nn.Linear(2, 10)
        self.nonlins = {'conv1':('max_relu',(2,2)),'conv2':('max_relu',(2,2)),'fc1':'relu','fc2':'log_softmax'}

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x,dim=1)


In [4]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.autograd import Variable
from pytorchpruner.utils import hessian_fun


x,y = list(trainloader)[0]
if use_cuda: #TODO test it
    x, y = x.cuda(), y.cuda()
x, y = Variable(x), Variable(y)

model = Net()
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-3)
optimizer.zero_grad()
output = model(x)
loss = criterion(output, y)
# print(flat_par)
%time hess = hessian_fun(loss,model.parameters(),flattened=True)
print(hess.size())


CPU times: user 1.82 s, sys: 157 ms, total: 1.98 s
Wall time: 1.07 s
torch.Size([167, 167])


## Mario's lib
https://github.com/antigol/hessian_pytorch/blob/master/examples/mnist_full_hessian.py

In [5]:
from hessian_pytorch import full_hessian
import numpy as np

def compute_hessian(model, dataset):
    def loss_function(batch):
        model.eval()  # disable dropout
        data, target = batch
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)

        output = model(data)
        loss = F.nll_loss(output, target, size_average=False) / len(dataset)
        return loss

    loader = torch.utils.data.DataLoader(dataset, batch_size=1000, shuffle=False, num_workers=1)
    parameters = [p for p in model.parameters() if p.requires_grad]
    hessian = full_hessian(loss_function, loader, parameters)
    return hessian
    
%time hess2 = compute_hessian(model, train_dataset)

CPU times: user 7.51 s, sys: 3.48 s, total: 11 s
Wall time: 7.08 s


In [6]:
print((hess2-hess).abs().sum())


6.930403878868674e-05
