In [1]:
# Library imports
import pyforest
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from tqdm import tqdm

from turtle import forward
import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

In [2]:
# The primary purpose of this notebook is to understand deeply how gradients
# are computed in PyTorch

#### **Gradients w.r.t a simple matrix**

In [3]:
temp = torch.tensor([[1., 2.], [3., 4.]], requires_grad = True)
temp

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [4]:
loss = temp[0][0] * 10 + temp[1][1] * 30
loss

tensor(130., grad_fn=<AddBackward0>)

In [5]:
torch.autograd.grad(loss, temp)

(tensor([[10.,  0.],
         [ 0., 30.]]),)

#### **Gradients w.r.t a simple matrix and other inner model parameters**

In [6]:
temp = torch.tensor([[1., 2.], [3., 4.]], requires_grad = True)
temp

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [7]:
theta = torch.tensor([10., 30.], requires_grad = True)
theta

tensor([10., 30.], requires_grad=True)

In [8]:
loss = temp[0][0] * theta[0] + temp[1][1] * theta[1]
loss

tensor(130., grad_fn=<AddBackward0>)

In [9]:
theta_grad = torch.autograd.grad(loss, theta, create_graph = True)
theta_grad
# (understand difference b/w retain_graph and create_graph)

(tensor([1., 4.], grad_fn=<AddBackward0>),)

In [10]:
temp_grad = torch.autograd.grad(loss, temp)
temp_grad

(tensor([[10.,  0.],
         [ 0., 30.]]),)

#### **Updating weights in PyTorch**

In [11]:
theta.data = theta.data - 0.1 * theta_grad[0].data
theta

tensor([ 9.9000, 29.6000], requires_grad=True)

In [12]:
temp.data = temp.data - 0.1 * temp_grad[0].data 
temp

tensor([[0., 2.],
        [3., 1.]], requires_grad=True)

In [13]:
theta.data = theta.data - 0.1 * theta_grad[0].data
theta

tensor([ 9.8000, 29.2000], requires_grad=True)

In [14]:
theta_grad = torch.autograd.grad(loss, theta, create_graph = True)
theta_grad
# This didn't work (need to understand retain_graph properly)

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

#### **Gradients w.r.t sparse matrices**

In [39]:
indices = torch.tensor([[0, 1], [0, 1]])
values = torch.tensor([1., 4.])
sm = torch.sparse_coo_tensor(indices, values, (2, 2), requires_grad = True)
print(indices)
print(values)
print(sm)
print(sm.to_dense())

tensor([[0, 1],
        [0, 1]])
tensor([1., 4.])
tensor(indices=tensor([[0, 1],
                       [0, 1]]),
       values=tensor([1., 4.]),
       size=(2, 2), nnz=2, layout=torch.sparse_coo, requires_grad=True)
tensor([[1., 0.],
        [0., 4.]], grad_fn=<ToDenseBackward0>)


In [40]:
loss = sm[0][0] * 10 + sm[1][1] * 30
loss

tensor(130., grad_fn=<AddBackward0>)

In [41]:
torch.autograd.grad(loss, sm)

RuntimeError: Function SelectBackward0 returned an invalid gradient at index 0 - expected type TensorOptions(dtype=float, device=cpu, layout=Sparse, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)) but got TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt))

In [38]:
torch.autograd.grad(loss, sm[0][0])

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

In [35]:
temp = torch.tensor([[1., 0.], [0., 4.]], requires_grad=True)
loss = temp[0][0] * 10 + temp[1][1] * 30
torch.autograd.grad(loss, temp)

(tensor([[10.,  0.],
         [ 0., 30.]]),)

In [29]:
sm_grad = torch.autograd.grad(loss, sm)
sm_grad

RuntimeError: Function SelectBackward0 returned an invalid gradient at index 0 - expected type TensorOptions(dtype=float, device=cpu, layout=Sparse, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)) but got TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt))

In [42]:
# It seems sparse matrices don't support autograd on many of the regular operations
# the supported operations list can be found here: https://pytorch.org/docs/stable/sparse.html
# for now, it seems simpler to stick to regular matrices

#### **Try computing gradients w.r.t different entries of a matrix seperately**

In [46]:
m = torch.tensor([[1., 2.], [3., 4.]], requires_grad = True)
m

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [47]:
loss = 10 * m[0][0] + 30 * m[1][1]
loss

tensor(130., grad_fn=<AddBackward0>)

In [52]:
torch.autograd.grad(loss, m[0, 0], allow_unused=True)

(None,)

In [61]:
m = torch.tensor([[1., 2.], [3., 4.]], requires_grad = True)
m

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [62]:
loss = 10 * m[0][0] + 30 * m[1][1] + 50 * m[0][1] + 70 * m[1][0]
loss

tensor(440., grad_fn=<AddBackward0>)

In [63]:
torch.autograd.grad(loss, m)[0][0]

tensor([10., 50.])

In [64]:
torch.autograd.grad(loss, m)[1][1]

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [55]:
torch.autograd.grad(loss, m)

(tensor([[10., 50.],
         [70., 30.]]),)

In [None]:
# It seems it's difficult to compute gradients w.r.t slices of a tensor
# as slices are considered as seperate tensors in the computational graph