In [1]:
import numpy as np
import torch
from torch.autograd import grad

## 1st method using grad()

In [None]:
x1 = torch.tensor(2.0, requires_grad = True)
x1.grad, x1.requires_grad

# y = x1**2
y1 = torch.cos(x1)
y2 = torch.sin(x1)
y = y1+y2
# print(y1, y1.grad)
# print(y2, y2.grad)
# print(y, y.grad)

print('--------------')
print(y.requires_grad, x1.grad)
# 1st way

gradients = grad( outputs = y, inputs=x1 )
print(gradients)

--------------
True None
(tensor(-1.3254),)


In [None]:
# internal calculation of gradient
y1_grad = -torch.sin(x1)
y2_grad = torch.cos(x1)
y1_grad + y2_grad

tensor(-1.3254, grad_fn=<AddBackward0>)

In [13]:
# 2nd attempt of the above code.

x1 = torch.tensor(3.0, requires_grad=True)
# x1.grad, x1.requires_grad
y = x1**2
# y = 3**2 = 9
print(y, y.requires_grad)

y2 = torch.sin(x1)
print(y2, y2.requires_grad)
# as soon as you perform some OP on tensor, the operation that was performed gets stored for future reference, a.k.a for backprop.

y3 = torch.mean(y2)
print(y3, y3.requires_grad)

# no gradient
x2 = torch.tensor(3.0)
y4 = torch.sin(x2)
print(y4, y4.requires_grad)
# requires_grad=True, PyTorch creates DAG, and stores the operations.



# Gradient computation
x1 = torch.tensor(3.0, requires_grad=True)
y1 = torch.sin(x1)

gradients = grad(outputs=y1, inputs=x1) #dy1/dx1
print(gradients)

# gradients_2 = grad(grad(outputs=y1, inputs=x1, create_graph=True))
# print(gradients_2)

tensor(9., grad_fn=<PowBackward0>) True
tensor(0.1411, grad_fn=<SinBackward0>) True
tensor(0.1411, grad_fn=<MeanBackward0>) True
tensor(0.1411) False
(tensor(-0.9900),)


## 2nd Method using backward()

In [None]:
x1 = torch.tensor(2.0, requires_grad = True)
x1.grad, x1.requires_grad

# y = x1**2
y1 = torch.cos(x1)
# y1.retain_grad() use this if you would like calculate dy/dy1.
y2 = torch.sin(x1)
# y2.retain_grad() use this if you would like calculate dy/dy2


y = y1+y2 # 2(y1+y2)

# print(y1.grad)

y.backward(retain_graph=True)  #asking pytorch debugger to look at the "grad_fn of y" and then compute the derivative.
print(f"Gradient single derivative: {x1.grad}") #gives the 1st derivative, because we have used .backward once.
# print(x1.grad, y1.grad, y2.grad) # accessing dy/dx
# y1.grad -> dy/dy1 = (y1 + y2)/dy1 = 2.
# y2.grad -> dy/dy2 = (y1 + y2)/dy2 = 2.


# what if I can use .backward() once again??
# does using .backward() again give me 2nd derivative
# twice the .backward() -> d2y/dx2 ??
#  solution: To use .backward() twice/>twice, make sure the computation graph is not destroyed. so use retain_graph=True while computing 1st derivative.
# y.backward(retrain_graph=Tru)
# print(x1.grad)

for i in range(10):
  y.backward(retain_graph=True)
  print(x1.grad)

Gradient single derivative: -1.325444221496582
tensor(-2.6509)
tensor(-3.9763)
tensor(-5.3018)
tensor(-6.6272)
tensor(-7.9527)
tensor(-9.2781)
tensor(-10.6036)
tensor(-11.9290)
tensor(-13.2544)
tensor(-14.5799)


In [23]:
x1 = torch.tensor(2.0, requires_grad = True)
x1.grad, x1.requires_grad

# y = x1**2
y1 = torch.cos(x1)
# y1.retain_grad() use this if you would like calculate dy/dy1.
y2 = torch.sin(x1)
# y2.retain_grad() use this if you would like calculate dy/dy2


y = y1+y2 # 2(y1+y2)


# gradients = grad(outputs=y, inputs=[x1])
y.backward(retain_graph=True)
print(x1.grad)

# 2nd derivative
y.backward(retain_graph=True)
print(x1.grad)

# 3rd deriv
y.backward()
print(x1.grad)

tensor(-1.3254)
tensor(-2.6509)
tensor(-3.9763)


tensor(-1.3254)


In [18]:
# # internally,
# -torch.sin(x1) + torch.cos(x1)

tensor(-1.3254, grad_fn=<AddBackward0>)

## Multi Dims

In [24]:
# R1-> R1
x = torch.tensor(2.0, requires_grad=True)
y = 2*x
print(y)
y.backward() #dy/dx
print(x.grad)



tensor(4., grad_fn=<MulBackward0>)
tensor(2.)


In [25]:
# RN -> R1
x = torch.tensor([2.0, 2.0, 2.0],requires_grad=True)
def operation(x):
  val = 2*x
  val = torch.cumsum(val, dim=0)
  return val[-1]

y = operation(x)
print(y)

y.backward()
print(x.grad)

tensor(12., grad_fn=<SelectBackward0>)
tensor([2., 2., 2.])


In [26]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

def operation(x):
  val = 2*x[0] + 3*x[1] -3*x[2]
  return val

y = operation(x)
print(y)

y.backward()
print(x.grad)

tensor(-1., grad_fn=<SubBackward0>)
tensor([ 2.,  3., -3.])


## Issue! Things get a bit dramatic, as soon as you have vectors

In [None]:
# Rn -> Rm (n,m > 1)

x = torch.tensor([1.0, 2.0, -2.0], requires_grad=True)
print(x.grad)
# y1 = torch.sin(x1)
y1 = x**2
print(y1)
print(x.grad)
# y2 = torch.cos()


# we want gradient -> [dy1/dx1, dy1/dx2, dy1/dx3]
# y1.backward([1.0, 0.0, 0.0])
# print(x.grad)

# convert vector to scalar
# y = [y1, y2, y3]

input_key = torch.tensor([[1.0, 0.0, 0.0],
                          [0.0, 1.0, 0.0],
                          [0.0, 0.0, 1.0]])

# final_gradient = []

# for row in input_key:
#   print(row)
#   gradient = y1.backward(row, retain_graph=True)
#   final_gradient.append(gradient)


# print(final_gradient)
# print(x.grad)
# tensor([ 2.,  4., -4.]) -> 1st method


# 2nd method
y1.backward(torch.tensor([1.0, 1.0, 1.0]))
print(x.grad)
# tensor([ 2.,  4., -4.])

# we can use .backward(torch.tensor(1.0, 0.0, 1.0)) -> in this case, only gradients of x1, x3 will be computed.
# Also, we can make use of .backward(torch.tensor(1.0, 0.8, 1.8)) API to do gradient scaling/weightage -> x1 100, x2 80%, x3 180%



None
tensor([1., 4., 4.], grad_fn=<PowBackward0>)
None
tensor([ 2.0000,  3.2000, -2.4000])


In [37]:
# 2nd attempt
# Rn -> Rm (n,m > 1)

x = torch.tensor([1.0, 2.0, -2.0], requires_grad=True)
print(x.grad)
# y1 = torch.sin(x1)
y1 = x**2
print(y1)
print(x.grad)

# backprop
# y1.backward()
# print(x.grad)

# Let's compute just 1st row
# key = torch.tensor([1.0, 0.0, 0.0])
# y1.backward(key)
# print(x.grad)


# 1st way
# y1.backward(torch.tensor([1, 1, 1]))
# print(x.grad)

#2d way
input_key = torch.tensor([[1.0, 0.0, 0.0],
                          [0.0, 1.0, 0.0],
                          [0.0, 0.0, 1.0]])

final_jacobian = []
for key in input_key:

  x.grad = None
  gradients = y1.backward(key, retain_graph=True)

  final_jacobian.append(x.grad)

print(final_jacobian)

None
tensor([1., 4., 4.], grad_fn=<PowBackward0>)
None
[tensor([2., 0., -0.]), tensor([0., 4., -0.]), tensor([ 0.,  0., -4.])]


In [None]:

# create tensors with requires_grad = true
x = torch.tensor(3.0)
# x = np.array([3.0])
# whenever you create a tensor with required_grad = True, the .grad is empty.
# print(x.grad)
y = torch.tensor(4.0)

# print the tensors
print("x:", x)
print("y:", y)

# define a function z of above created tensors
z = x*y
print("z:", z)

# call backward function for z to compute the gradients
# dz/dx, dz/dy -> z.backward()
# z.backward(retain_graph=True)
#  z = x*y ; DAG gets destroyed.

# # Access and print the gradients w.r.t x, and y
# dx = x.grad
# dy = y.grad
# # x.grad = 0
# # y.grad = 0
# # z.backward(retain_graph=True)
# z.backward(retain_graph=True)
# print("x.grad :", dx)
# print("y.grad :", dy)
# # pytorch accumulates the gradients.
# z.backward()


x: tensor(3.)
y: tensor(4.)
z: tensor(12.)


In [None]:
x = torch.tensor(3.0, requires_grad= True)
y = torch.tensor(4.0, requires_grad = True)

z = x*y
print(x.grad, y.grad)
h = z.mean()
print(z)
print(h)
z.backward()
print(x.grad, y.grad)


x1 = torch.tensor([1.2, 2.4, 3.3], requires_grad = True)
x2 = torch.tensor([4.4, 5.2, 5.5], requires_grad=True)
f = x1*x2 + torch.sin(x2) + torch.log(x1)
g = f.mean()
print(f, g)
f.backward(gradient=torch.tensor([1., 1., 1.]))
# g.backward()
print(x1.grad, x2.grad)

None None
tensor(12., grad_fn=<MulBackward0>)
tensor(12., grad_fn=<MeanBackward0>)
tensor(4.) tensor(3.)
tensor([ 4.5107, 12.4720, 18.6384], grad_fn=<AddBackward0>) tensor(11.8737, grad_fn=<MeanBackward0>)
tensor([5.2333, 5.6167, 5.8030]) tensor([0.8927, 2.8685, 4.0087])


In [None]:
# create tensors with requires_grad = true
x = torch.tensor(3.0, requires_grad = True)
y = torch.tensor(4.0, requires_grad = True)

# print the tensors
print("x:", x)
print("y:", y)

# define a function z of above created tensors
z = x*y
print("z:", z)
# z.retain_graph = True
# call backward function for z to compute the gradients
dx2 = grad(z, x, retain_graph=True)
dy2 = grad(z, y)
# Access and print the gradients w.r.t x, and y
# dx = x.grad
# dy = y.grad
print("x.grad :", dx2)
print("y.grad :", dy2)

x: tensor(3., requires_grad=True)
y: tensor(4., requires_grad=True)
z: tensor(12., grad_fn=<MulBackward0>)
x.grad : (tensor(4.),)
y.grad : (tensor(3.),)
