## Differentiation

`torch.autograd` the pytorch automatic differentiation engine.

Derivative is the instantaneous rate of range.

Partial derivatives: the gradient of $f(x_1, ..., x_n)$ is a vector of $n$ partial derivatives.



In [1]:
import torch

In [2]:
def f(x):
    return 3 * x**2 - 4 * x


def numerical_lim(f, x, h):
    return (f(x + h) - f(x)) / h


h = 0.1
for _ in range(6):
    print(f"h={h:.6f}, numerical limit={numerical_lim(f, 1, h):.6f}")
    h *= 0.1

h=0.100000, numerical limit=2.300000
h=0.010000, numerical limit=2.030000
h=0.001000, numerical limit=2.003000
h=0.000100, numerical limit=2.000300
h=0.000010, numerical limit=2.000030
h=0.000001, numerical limit=2.000003


In [3]:
x = torch.arange(4.0, requires_grad=True)
x

tensor([0., 1., 2., 3.], requires_grad=True)

In [4]:
print(x.grad)  # default is None

None


In [5]:
# define f(x):
y = torch.dot(x, x)
y

tensor(14., grad_fn=<DotBackward0>)

In [6]:
# to find the grandients we need to find the partial derivatives:
y.backward()

In [7]:
print(x.grad)

tensor([0., 2., 4., 6.])


In [8]:
# verify
x.grad == 2 * x

tensor([True, True, True, True])

In [9]:
# another example:
# f(vector x) = x1 + x2 + ... + xn
# Gradient should be [1, 1, ..., 1]

# clear previous values
x.grad.zero_()
# define the f(x)
y = x.sum()
# find the derivatives
y.backward()
# check the grandients
x.grad

tensor([1., 1., 1., 1.])

In [10]:
# for non-scalars.
# Using Jacobian matrix (internally).
x = torch.arange(3.0, requires_grad=True)
# define the f(x)
y = x * x
# find the derivatives
y.backward(torch.tensor([1.0, 1.0, 1.0]))
# check the grandients
x.grad

tensor([0., 2., 4.])

In [11]:
x = torch.arange(3.0, requires_grad=True)
y = x * x

# notice the 3rd element in the input vector parameter
y.backward(torch.tensor([1.0, 1.0, 0]))
x.grad

tensor([0., 2., 0.])

In [12]:
# Another example, now using tensors:
# Say Q = 3aˆ3 - bˆ2; with a = [a1, a2].T and b = [b1, b2].T
# The Jacobian matrix is
#   J = [ 9a1ˆ2   -2b1   0      0   ]
#       [ 0       0      9a2ˆ2  -2b2]
#
#  given v= [1 1]ˆT
#
#   JˆT . v = [9a1ˆ2   -2b1   9a2ˆ2   -2b2].T
#
# with pytorch:

a = torch.tensor([2.0, 3.0], requires_grad=True)
b = torch.tensor([6.0, 4.0], requires_grad=True)

Q = 3 * a**3 - b**2

external_grad = torch.tensor([1, 1])

# notice the 3rd element in the input vector parameter
Q.backward(gradient=external_grad)
print(a.grad, b.grad)

tensor([36., 81.]) tensor([-12.,  -8.])



with $a = [2., 3.]$

$\nabla a = [9 a_1^2, 9 a_2^2] = [9*2^2, 9*3^2] = [36, 81]$

with $b = [6., 4.]$

$\nabla b = [-2b_1, -2b_2] = [-2*6, -2*4] = [-12, -8]$