In [1]:
import torch, torchvision

In [2]:
## Autograd
 # torc.autograd is an automatic differentiation tool for use in NN training, backprop with gradient    descent, etc

In [15]:
## Demonstration of a single training step
model = torchvision.models.resnet18(pretrained=True)  # pretrained model provided
data = torch.rand(1,3,64,64)  # 1 image, 3 channels, 64x64
labels = torch.rand(1,1000)   # Random initial labels

# forward pass
pred = model(data)

# Backward Pass
loss = (pred-labels).sum()  #tensor(-520.9473, grad_fn=<SumBackward0>)
# The loss calculation defines the gradient function. Autograd will calculate the gradients and store them in the model.{parameter}grad attribute
loss.backward()

# Optimization
# We can create an optimizer, and specify the optimization approach. Example uses Stochastic Gradient Descent. Learning Rate = .01 and momentum - 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
optimizer.step()  # This adjusts each parameter by the gradient stored in .grad affter .backwards()

In [27]:
## How Autograd works under the hood
a = torch.tensor([2.,3.], requires_grad=True)  # require_grad will track every op on the tensor
b = torch.tensor([6.,4.], requires_grad=True)

## Imagine a and b are neural network parameters, let's define the error as Q = 3a^3 - b^2
# So, the gradient with respect to a: dQ/da = 9a^2
# the gradient with respect to b: dQ/db = -2b
Q = 3*a**3 - b**2

## We can all .backward() on Q, and the above gradients will be calculated.
# We need to pass a gradient argument equivalent to backwards, which == torch.ones(Q.shape)
Q.backward(gradient=torch.ones(Q.shape))

# Let's check of our math is right
assert all(a.grad == 9*a.mul(a))  # 9a^2
assert all(b.grad == -2*b)        # -2b

In [29]:
## Vector Calculus using Autograd
# let y> = f(x>) where y>, x> indicate that y and x are vectors

# The gradient of y> with respect to x> is a jacobian Matrix: (dy/dx1, dy/dx2...dy/dxn). Assuming Y is an m dimensional vecotr, it follows that J = (dy1/dx1 ... dy1.dxn
#                    .        .      .            
#                    dym/dx1 ... dym/dxn )

In [28]:
t
a.grad

tensor([36., 81.])

In [26]:
all(t)

True