In [2]:
import numpy as np
import torch as T
from torch.autograd import grad
from hessian import hessian

In [15]:
class PhysicsLoss(T.nn.Module):

    def __init__(self, k):
        super(PhysicsLoss, self).__init__()
        self.k = k
    def N(self, out, v, dt):
        laplacian = T.diagonal(hessian(out.view(1), model.parameters()).size()) 
        return T.sum(laplacian) -  dt / v / v
        
    def forward(self, fd, out, physics_nn, dt, v):
        boundary = T.mean(T.pow(T.abs(fd - out), 2))
        physics = T.mean(self.N(physics_nn, v, dt))
        return boundary + k * physics

In [16]:
import torch
from torch import Tensor
from torch.autograd import Variable
from torch.autograd import grad
from torch import nn

# some toy data
x = Variable(Tensor([4., 2.]), requires_grad=False)
y = Variable(Tensor([1.]), requires_grad=False)

# linear model and squared difference loss
model = nn.Sequential(
    nn.Linear(2, 4),
    nn.Linear(4, 1),
)

loss = torch.sum((y - model(x))**2)

# instead of using loss.backward(), use torch.autograd.grad() to compute gradients
loss_grads = grad(loss, model.parameters(), create_graph=True)

# compute the second order derivative w.r.t. each parameter

AssertionError: 

In [21]:
import torch
from torch import Tensor
from torch.autograd import Variable
from torch.autograd import grad
from torch import nn

torch.manual_seed(1)
x = Variable(Tensor([2.]), requires_grad=False)

model = nn.Linear(1, 1, bias=False)

x0 = [par.data for par in model.parameters()][0]
print(x0)

loss = torch.sum(model(x)**2)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_grads = grad(loss, model.parameters(), create_graph=True)
gn2 = sum([grd.norm()**2 for grd in loss_grads]) / 2 # 2nd derive
print('loss %f grad norm %f' % (loss.data, gn2.data))

for grd in loss_grads:
    grd = grd.retain_grad()

model.zero_grad()
gn2.backward(retain_graph=True)

for grd in loss_grads:
    print(8 * x0, grd.data[0], grd.grad)





tensor([[0.5153]])
loss 1.061985 grad norm 8.495877
tensor([[4.1221]]) tensor([4.1221]) tensor([[4.1221]])


In [32]:
import torch                                                                                          
                                                                                                      
def jacobian(y, x, create_graph=False):                                                               
    jac = []                                                                                          
    flat_y = y.reshape(-1)                                                                            
    grad_y = torch.zeros_like(flat_y)                                                                 
    for i in range(len(flat_y)):                                                                      
        grad_y[i] = 1.                                                                                
        grad_x, = torch.autograd.grad(flat_y, x, grad_y, retain_graph=True, create_graph=create_graph)
        jac.append(grad_x.reshape(x.shape))                                                           
        grad_y[i] = 0.                                                                                
    return torch.stack(jac).reshape(y.shape + x.shape)                                                
                                                                                                      
def hessian(y, x):                                                                                    
    return jacobian(jacobian(y, x, create_graph=True), x)                                             
                                                                                                      
def f(x):                                                                                             
    return x * x * torch.arange(4, dtype=torch.float)                                                 
                                                                                                      
x = torch.ones(4, requires_grad=True)                                                                 
print(jacobian(f(x), x))                                                                              
print(hessian(f(x), x))   

tensor([[0., 0., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 4., 0.],
        [0., 0., 0., 6.]])
tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 2., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 4., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 6.]]])


In [34]:
sample

tensor([[12., 15.,  1.]])

In [52]:
t = 1
x = 12
z = 15

sample = [x, z, t]
sample = torch.Tensor(sample).view(1, -1)
sample.requires_grad = True

model = nn.Sequential(
    nn.Linear(3, 3),
    nn.Linear(3, 1)
)

hessian(model(sample), sample)

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

In [43]:
!pip install backpack-for-pytorch

Collecting backpack-for-pytorch
  Using cached backpack_for_pytorch-1.2.0-py3-none-any.whl (160 kB)


ERROR: Could not find a version that satisfies the requirement torchvision<1.0.0,>=0.7.0 (from backpack-for-pytorch) (from versions: 0.1.6, 0.1.7, 0.1.8, 0.1.9, 0.2.0, 0.2.1, 0.2.2, 0.2.2.post2, 0.2.2.post3, 0.3.0, 0.4.1, 0.5.0)
ERROR: No matching distribution found for torchvision<1.0.0,>=0.7.0 (from backpack-for-pytorch)


In [44]:
torch.autograd.functional.hessian(model, sample, create_graph=False, strict=False)

tensor([[[[0., 0., 0.]],

         [[0., 0., 0.]],

         [[0., 0., 0.]]]])

In [45]:
# https://pytorch.org/docs/stable/autograd.html

In [53]:
def laplace(fx: torch.Tensor, x: torch.Tensor):
    """
    Laplacian (= sum of 2nd derivations)
     of (evaluated) nd->1d-function fx w.r.t. nd-tensor x
    :rtype: torch.Tensor
    """
    dfx = fx
    dfx = torch.autograd.grad(dfx, x, create_graph=True)[0]
    ddfx = []
    for i in range(len(x)):
        vec = torch.tensor([(1 if i == j else 0) for j in range(len(dfx))], dtype=torch.float)
        ddfx += [torch.autograd.grad(
            dfx,
            x,
            create_graph=True,
            grad_outputs=vec
        )[0][i]]
    ret = sum(ddfx)
    return ret

In [54]:
laplace(model(sample), sample)

RuntimeError: Mismatch in shape: grad_output[0] has a shape of torch.Size([1]) and output[0] has a shape of torch.Size([1, 3]).