In [1]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("ggplot")

import torch
from torch.autograd import grad

I don't know how to deal with PyTorch gradients.

Suppose I have a function $f(x) = x^2$ and a function $g(x) = x + \nabla_x f(x)$. How can I get $\nabla_x g(x)$?

In [2]:
def f(x):
    return x**2

x = torch.tensor([2.0], requires_grad=True)
y = f(x)

y.backward()
x.grad

tensor([4.])

For reference, in this case $g(x) = 3x$ and $\nabla_x g(x)=3$.

In [3]:
### torch.autograd.grad test
x = torch.tensor([2.0], requires_grad=True)

def h(x):
    return x**3 

def h_prime(x):
    return grad(h(x), x, create_graph=True, only_inputs=True)[0]

z = h_prime(x)
z.backward()

x.grad

tensor([12.])

Yeah, this library is amazing. I should try to implement this myself at some point.

In [4]:
def g(x):
    return x + grad(f(x), x, create_graph=True)[0]

x = torch.tensor([-4.0], requires_grad=True)
y = g(x)

y.backward()
x.grad

tensor([3.])

**langevin-stein**

Now to test this on the Langevin-Stein operator: $$\mathcal{A}_p\phi(x) = \phi(x)\nabla_x\log{p(x)}^T + \nabla_x\phi(x)$$

In [5]:
def langevin_stein(p, phi, x):
    """
    implementation of the Langevin-Stein operator, which is an operator
    variational objective useful for SVGD.
    """
    def logp(x):
        return torch.log(p(x))
    nabla_logp = grad(logp(x), x, create_graph=True)[0]
    return torch.dot(phi(x), nabla_logp) + grad(phi(x), x, create_graph=True)[0]

In [6]:
def phi(x):
    return x**3

def p(x):
    return torch.exp(x)

x = torch.tensor([1.0], requires_grad=True)
y = langevin_stein(p, phi, x)

# in this case, langevin_stein(p, phi, x) = x**3 + 3x**2,
#               gradient(LS) = 3x**2 + 6x
y.backward()
x.grad

tensor([9.])

**vectorizing**

In [7]:
def f(x):
    return x**2

x = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
print(x)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)


In [8]:
print(f(x))
grad(f(x), x, grad_outputs=torch.ones_like(x), create_graph=True)[0]

tensor([[ 1.,  4.],
        [ 9., 16.]], grad_fn=<PowBackward0>)


tensor([[2., 4.],
        [6., 8.]], grad_fn=<MulBackward0>)

In [16]:
x = torch.tensor([[1.0, 1.0]], requires_grad=True)
ker = torch.tensor([[1.0, 2.0], [-1.0, 1.0]], requires_grad=True)
torch.mm(x, ker)

tensor([[0., 3.]], grad_fn=<MmBackward>)

In [28]:
def f(x):
    res = torch.zeros_like(x)
    res[0] = x[0] ** 2
    res[1] = x[1] ** 3
    return res

x = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
grad(f(x), x, grad_outputs=torch.ones_like(x))

(tensor([[ 2.,  4.],
         [27., 48.]]),)