In [10]:
# goal: calculate derivative \del_\theta a(\theta) v, where a(\theta) is a
# vector field and v is a vector.
import torch
import random

torch.manual_seed(4232)
theta = torch.ones(3, requires_grad=True)
def get_a_v():
  a = torch.zeros(2)
  a[0] = 1 * theta[0].pow(2) + 2 * theta[1].pow(2) + 3 * theta[2].pow(2)
  a[1] = 4 * theta[0].pow(2) + 5 * theta[1].pow(2) + 6 * theta[2].pow(2)
  v = torch.Tensor([[10.1, 20.], [9.9, 20], [10, 20], [10, 20], [10, 20]])
  return a, v
"""
\del_\theta a(\theta) = \partial_\theta_i a_j(\theta)
= \partial_{\theta_1} a_1, \partial_\theta_1 a_2, \partial_{\theta_1} a_3
...

\del_\theta a(\theta) v = [ \partial_{\theta_1} a_1 * v_1 + \partial_{\theta_1} a_2 * v_2,
                            \partial_{\theta_2} a_1 * v_1 + ...,
                            \partial_{\theta_3} a_1 * v_1 + ...]
= [2 * \theta_1 * v_1 +  4 * \theta_1 * v_2,
   4 * \theta_2 * v_1 + 10 * \theta_2 * v_2,
   6 * \theta_3 * v_1 + 12 * \theta_3 * v_2]

\del_\theta sum a(\theta) = [10 * \theta_1, 14 * \theta_1, 18 * \theta_3]
"""
def get_true_grad(theta, v):
  true_grad = torch.zeros(3)
  true_grad[0] = (2 * theta[0] * v[:, 0] + 8 * theta[0] * v[:, 1]).mean()
  true_grad[1] = (4 * theta[1] * v[:, 0] + 10 * theta[1] * v[:, 1]).mean()
  true_grad[2] = (6 * theta[2] * v[:, 0] + 12 * theta[2] * v[:, 1]).mean()
  return true_grad

a, v = get_a_v()
true_grad = get_true_grad(theta, v)
theta.grad = None
(a * v).sum().backward()
print(theta.grad / 5, true_grad)

tensor([180., 240., 300.]) tensor([180., 240., 300.], grad_fn=<CopySlices>)


In [14]:
v.shape

torch.Size([5, 2])

In [15]:
a.shape

torch.Size([2])

In [13]:
# following https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
theta.grad.zero_()
a, v = get_a_v()
a.backward(v)
print(theta.grad / 5, true_grad)

tensor([180., 240., 300.]) tensor([180., 240., 300.], grad_fn=<CopySlices>)


In [21]:
theta.grad.zero_()
a, v = get_a_v()
a.sum().backward()

In [22]:
print(theta.grad)

tensor([10., 14., 18.])
