## Hessian Calculation thorugh hessian-vector Product Example
Purpose: 
- To demonstrate the usage of `hessian_fun` and `gradient_fun` from `pytorchpruner.utils`.
- These functions are also used as tests/


In [1]:
import torch
from torch.autograd import Variable
from torch.nn import Parameter
import sys
sys.path.insert(0,"../")
from pytorchpruner.utils import hessian_fun,gradient_fun

Lets first compare the gradient, as you can see the autograd corrrectly calculates the gradient **G(w)**

In [2]:
# We have 3 random parameters
w = Parameter(torch.rand(3))
print(w)

def L(w):
    #A custom loss function
    return (w[0]**2)*w[1]+4*(w[2]**3)*w[0]

def G(w):
    #Jacobian of the L(w)
    wd = w.data
    return torch.Tensor([2*wd[0]*wd[1]+4*(wd[2]**3),
                         wd[0]**2,
                        12*(wd[2]**2)*wd[0]])

#torch.autograd
loss_val = L(w)
autograd_grad = gradient_fun(loss_val, w).data
#emprical gradient
correct_grad = G(w)
print('The difference is: ',(autograd_grad-correct_grad).abs().sum())


Parameter containing:
 0.6152
 0.2209
 0.9314
[torch.FloatTensor of size 3]

The difference is:  0.0


## Lets try 2d

In [3]:
# We have 4 random parameters
w = Parameter(torch.rand(2,2))
print(w)

def L2d(w):
    '''
    A custom loss function
        w00 w01
        w10 w11
    '''    
    return (w[1,0]**2)*w[1,1]+4*(w[0,0]**3)*w[1,0]

def G2d(w):
    #Jacobian of the L(w)
    wd = w.data
    return torch.Tensor([
                         [12*(wd[0,0]**2)*wd[1,0],
                          0],
                         [2*wd[1,0]*wd[1,1]+4*(wd[0,0]**3),
                          wd[1,0]**2]
                        ])

#torch.autograd
loss_val = L2d(w)
autograd_grad = gradient_fun(loss_val, w).data
print(autograd_grad)
#emprical gradient
correct_grad = G2d(w)
print(correct_grad)
print('The difference is: ',(autograd_grad-correct_grad).abs().sum())


Parameter containing:
 0.9894  0.6487
 0.8904  0.2573
[torch.FloatTensor of size 2x2]


 10.4586   0.0000
  4.3320   0.7928
[torch.FloatTensor of size 2x2]


 10.4586   0.0000
  4.3320   0.7928
[torch.FloatTensor of size 2x2]

The difference is:  0.0


now lets calculates hessian through hessian vector product

In [4]:
w = Parameter(torch.rand(3))

def H(w):
    #Hessian of the L(w)
    wd=w.data
    gw12 = 2*wd[0]
    gw13 = 12*(wd[2]**2)
    gw23 = 0
    gw11 = 2*wd[1]
    gw22 = 0
    gw33 = 24*wd[2]*wd[0]  
    return torch.Tensor([[gw11,gw12,gw13],
                         [gw12,gw22,gw23],
                         [gw13,gw23,gw33]])
    
a = L(w)
hessian = hessian_fun(a,w)
    
print(hessian)
print(torch.sum(torch.abs(hessian-H(w))))



 1.1133  1.8402  0.3115
 1.8402  0.0000  0.0000
 0.3115  0.0000  3.5577
[torch.FloatTensor of size 3x3]

0.0


## Lets try 2d

In [5]:
w = Parameter(torch.rand(2,2))
    
def H2d(w):
    #Hessian of the L(w)
    wd=w.data
    gw12 = 2*wd[1,0]
    gw13 = 12*(wd[0,0]**2)
    gw23 = 0
    gw11 = 2*wd[1,1]
    gw22 = 0
    gw33 = 24*wd[0,0]*wd[1,0] 
    ## x3 0
    ## x1 x2
    return torch.Tensor([[[[gw33,0], #0,0,0,:
                           [gw13,gw23]], #0,0,1,: #x3 with others
                          [[0,0], #0,1,0,:
                           [0,0]]], #0,1,1,: 
                         [[[gw13,0], #1,0,0,:
                           [gw11,gw12]], #1,0,1,: #x1 with others
                          [[gw23,0], #1,1,0,:
                          [gw12,gw22]]]]) #1,1,1,: #x2 with others

a = L2d(w)
hessian = hessian_fun(a,w)

print(torch.stack(hessian))
print(torch.sum(torch.abs(hessian-H2d(w))))


(0 ,0 ,.,.) = 
  0.4570  0.0000
  0.0136  0.0000

(0 ,1 ,.,.) = 
  0.0000  0.0000
  0.0000  0.0000

(1 ,0 ,.,.) = 
  0.0136  0.0000
  1.4941  1.1315

(1 ,1 ,.,.) = 
  0.0000  0.0000
  1.1315  0.0000
[torch.FloatTensor of size 2x2x2x2]

0.0


## Let's try selecting some parameters 

In [7]:
# We have 3 random parameters
genesis = torch.rand(3)
w = Parameter(genesis)
w2 = Parameter(genesis)

def L2(w,w2):
    #A custom loss function
    return (w[0]**2)*w[1]+4*(w[2]**3)*w[0]+(w2[0]**2)*w2[1]+4*(w2[2]**3)*w2[0]

def G2(w,w2):
    #Jacobian of the L(w)
    wd = w.data
    wd2 = w2.data
    return (torch.Tensor([2*wd[0]*wd[1]+4*(wd[2]**3),
                         wd[0]**2,
                        12*(wd[2]**2)*wd[0]]),
            torch.Tensor([2*wd2[0]*wd2[1]+4*(wd2[2]**3),
                         wd2[0]**2,
                        12*(wd2[2]**2)*wd2[0]])
           )

#torch.autograd
loss_val = L2(w,w2)
autograd_grad = gradient_fun(loss_val, w2).data
#emprical gradient
correct_grad = G2(w,w2)[1]
print(autograd_grad)
print('The difference is: ',(autograd_grad-correct_grad).abs().sum())



1.00000e-02 *
  3.2827
  6.8760
  1.9848
[torch.FloatTensor of size 3]

The difference is:  5.587935447692871e-09


## Now for hessian

In [9]:
# We have 3 random parameters
genesis = torch.rand(3)
w = Parameter(genesis)
w2 = Parameter(genesis)

def L2(w,w2):
    #A custom loss function
    return (w[0]**2)*w[1]+4*(w[2]**3)*w[0]+(w2[0]**2)*w2[1]+4*(w2[2]**3)*w2[0]

def H2(w,w2):
    #Hessian of the L(w)
    wd=w.data
    wd2 = w2.data
    
    gw12 = 2*wd[0]
    gw13 = 12*(wd[2]**2)
    gw23 = 0
    gw11 = 2*wd[1]
    gw22 = 0
    gw33 = 24*wd[2]*wd[0]  
    return (torch.Tensor([[gw11,gw12,gw13],
                         [gw12,gw22,gw23],
                         [gw13,gw23,gw33]]),
            torch.Tensor([[gw11,gw12,gw13],
                         [gw12,gw22,gw23],
                         [gw13,gw23,gw33]])
           )
    

losses = L2(w,w2)
hessian2 = hessian_fun(losses,w2)
    
print(hessian)
print(torch.sum(torch.abs(hessian2-H2(w,w2)[1])))



(0 ,0 ,.,.) = 
  0.4570  0.0000
  0.0136  0.0000

(0 ,1 ,.,.) = 
  0.0000  0.0000
  0.0000  0.0000

(1 ,0 ,.,.) = 
  0.0136  0.0000
  1.4941  1.1315

(1 ,1 ,.,.) = 
  0.0000  0.0000
  1.1315  0.0000
[torch.FloatTensor of size 2x2x2x2]

0.0
