In [1]:
import torch

## `torch.autograd.grad`

In [44]:
def flat_grad(y, x, retain_graph=False, create_graph=False):
    if create_graph:
        retain_graph = True

    g = torch.autograd.grad(y, x, retain_graph=retain_graph, create_graph=create_graph)
    g = torch.cat([t.view(-1) for t in g])
    return g

In [52]:
def f(x):
    return (x**2).sum()

In [53]:
x = torch.arange(4, dtype=torch.float32, requires_grad=True)
x

tensor([0., 1., 2., 3.], requires_grad=True)

In [54]:
flat_grad(f(x), x)

tensor([0., 2., 4., 6.])

In [55]:
f(x).backward()
x.grad

tensor([0., 2., 4., 6.])

## jacobian 

$\mathbf f: R^n\rightarrow R^m, \mathbf J\in R^{m\times n}$

$$
\mathbf J = \begin{bmatrix}
  \dfrac{\partial \mathbf{f}}{\partial x_1} & \cdots & \dfrac{\partial \mathbf{f}}{\partial x_n}
\end{bmatrix}
= \begin{bmatrix}
  \nabla^{\mathrm T} f_1 \\  
  \vdots \\
  \nabla^{\mathrm T} f_m   
\end{bmatrix}
= \begin{bmatrix}
    \dfrac{\partial f_1}{\partial x_1} & \cdots & \dfrac{\partial f_1}{\partial x_n}\\
    \vdots                             & \ddots & \vdots\\
    \dfrac{\partial f_m}{\partial x_1} & \cdots & \dfrac{\partial f_m}{\partial x_n}
\end{bmatrix}
$$

In [2]:
def f(x):                                                                                             
    return x * x * torch.arange(4, dtype=torch.float)         

$$
\begin{split}
\mathbf f(\mathbf x)=
\begin{bmatrix}
f_1(x_1)\\
f_2(x_2)\\
f_3(x_3)\\
f_4(x_4)
\end{bmatrix}=\begin{bmatrix}
0\\
x_2^2\\
2 x_3^2\\
3 x_4^2
\end{bmatrix}\\
\end{split}
$$


$$
\mathbf J=\begin{bmatrix}
\dfrac{\partial f_1}{\partial x_1} & \dfrac{\partial f_1}{\partial x_2} & \dfrac{\partial f_1}{\partial x_3} & \dfrac{\partial f_1}{\partial x_4}\\
\dfrac{\partial f_2}{\partial x_1} & \dfrac{\partial f_2}{\partial x_2} & \dfrac{\partial f_2}{\partial x_3} & \dfrac{\partial f_2}{\partial x_4}\\
\dfrac{\partial f_3}{\partial x_1} & \dfrac{\partial f_3}{\partial x_2} & \dfrac{\partial f_3}{\partial x_3} & \dfrac{\partial f_3}{\partial x_4}\\
\dfrac{\partial f_4}{\partial x_1} & \dfrac{\partial f_4}{\partial x_2} & \dfrac{\partial f_4}{\partial x_3} & \dfrac{\partial f_4}{\partial x_4}\\
\end{bmatrix}=\begin{bmatrix}
0 & 0 & 0 & 0\\
0 & 2x_2 & 0 & 0\\
0 & 0 & 4x_3 & 0\\
0 & 0 & 0 & 6x_4\\
\end{bmatrix}
$$

In [17]:
def jacobian(y, x, create_graph=False):                                                               
    jac = []                                                                                          
    flat_y = y.reshape(-1)                                                                            
    grad_y = torch.zeros_like(flat_y)                                                                 
    for i in range(len(flat_y)):                                                                      
        grad_y[i] = 1.                                                                                
        grad_x, = torch.autograd.grad(flat_y, x, grad_y, retain_graph=True, create_graph=create_graph)
        # print(flat_y, x, i, grad_x)
        jac.append(grad_x.reshape(x.shape))                                                           
        grad_y[i] = 0.                                                                                
    return torch.stack(jac).reshape(y.shape + x.shape)   

In [18]:
x = torch.ones(4, requires_grad=True)     
x

tensor([1., 1., 1., 1.], requires_grad=True)

In [40]:
f(x)

tensor([0., 1., 2., 3.], grad_fn=<MulBackward0>)

In [43]:
f(x).reshape(-1).shape

torch.Size([4])

In [22]:
J = torch.autograd.functional.jacobian(f, x)
J

tensor([[0., 0., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 4., 0.],
        [0., 0., 0., 6.]])

In [20]:
jacobian(f(x), x)

tensor([[0., 0., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 4., 0.],
        [0., 0., 0., 6.]])

## hessian

$f : R^n \to R$

$$
\mathbf H_f=(\mathbf H_f)_{i,j} = \frac{\partial^2 f}{\partial x_i \, \partial x_j}= \begin{bmatrix}
  \dfrac{\partial^2 f}{\partial x_1^2} & \dfrac{\partial^2 f}{\partial x_1\,\partial x_2} & \cdots & \dfrac{\partial^2 f}{\partial x_1\,\partial x_n} \\[2.2ex]
  \dfrac{\partial^2 f}{\partial x_2\,\partial x_1} & \dfrac{\partial^2 f}{\partial x_2^2} & \cdots & \dfrac{\partial^2 f}{\partial x_2\,\partial x_n} \\[2.2ex]
  \vdots & \vdots & \ddots & \vdots \\[2.2ex]
  \dfrac{\partial^2 f}{\partial x_n\,\partial x_1} & \dfrac{\partial^2 f}{\partial x_n\,\partial x_2} & \cdots & \dfrac{\partial^2 f}{\partial x_n^2}
\end{bmatrix}
$$

In [23]:
def hessian(y, x):                                                                                    
    return jacobian(jacobian(y, x, create_graph=True), x)                                             

In [24]:
hessian(f(x), x)

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 2., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 4., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 6.]]])

In [35]:
def f_0(x):
    return (x * x * torch.arange(4, dtype=torch.float32))[0]
def f_1(x):
    return (x * x * torch.arange(4, dtype=torch.float32))[1]
def f_2(x):
    return (x * x * torch.arange(4, dtype=torch.float32))[2]
def f_3(x):
    return (x * x * torch.arange(4, dtype=torch.float32))[3]

In [37]:
print(torch.autograd.functional.hessian(f_0, x))
print(torch.autograd.functional.hessian(f_1, x))
print(torch.autograd.functional.hessian(f_2, x))
print(torch.autograd.functional.hessian(f_3, x))

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 4., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 6.]])


In [39]:
from functools import partial
def f_i(x, i):
    return (x*x*torch.arange(4, dtype=torch.float))[i]

torch.autograd.functional.hessian(partial(f_i, i=3), x)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 6.]])