Taken from "Python Engineer" @ https://www.youtube.com/watch?v=DbeIqrwb_dE|

In [1]:
import torch

In [29]:
# requires_grad = True 
# This tells that we will require to calculate gradient of some function wrt x
#  x
#   \
#    [+] -> y
#   /
#  2
torch.manual_seed(144)
x = torch.randn(3, requires_grad = True)

In [30]:
print(x)

tensor([ 0.7102,  1.2660, -0.5788], requires_grad=True)


In [31]:
y = x + 2
print(y)

tensor([2.7102, 3.2660, 1.4212], grad_fn=<AddBackward0>)


We see that AddBackward0 function there

In [32]:
z = y*y *2
print(z)

tensor([14.6908, 21.3330,  4.0396], grad_fn=<MulBackward0>)


In [33]:
# This gives error as we still dont have a scalar output z.  z is vector as of now
#z.backward()  # will give dz/dx
#print(x.grad) 

In [34]:
z = z.mean()
print(z)

tensor(13.3545, grad_fn=<MeanBackward0>)


In [35]:
z.backward()  # will give dz/dx
print(x.grad)        # attribute which stores gradients

tensor([3.6137, 4.3546, 1.8949])


In [37]:
# By calculus - I calculate derivative of => [dz/dx1, dz/dx2, dz/dx3]
(4/3)*(x + 2.0)

tensor([3.6137, 4.3546, 1.8949], grad_fn=<MulBackward0>)

In [9]:
# If the objective is a vector valued function
# [y1, y2, .., ym]
# ---------------------------------------------------------
# Then derivative comes from chain rule  J.v 
# J is jacobian of y wrt x 
#  ie rows are 
#   (dy1/dx1 dy2/dx1.. dym/dx1)
#   (dy1/dx2 dy2/dx2.. dym/dx2)
#   (dy1/dx3 dy2/dx3.. dym/dx3)
#   (dy1/dx4 dy2/dx4.. dym/dx4)
# And v is column of df/dy1, df/dy2
# So that ith row of J.v is  (df/dy1)(dy1/dxi) + (df/dy2)(dy2/dxi) + ..+ (df/dym)(dym/dxi)
#
#

In [28]:
torch.manual_seed(144)
x = torch.randn(3, requires_grad = True)
print("x:" + str(x))
y = x + 2
z = y * y * 2
# z is vector valued
print("z:" + str(z))

x:tensor([ 0.7102,  1.2660, -0.5788], requires_grad=True)
z:tensor([14.6908, 21.3330,  4.0396], grad_fn=<MulBackward0>)


In [23]:
#z.backward() will fail. Since gradient is vector valued 
#we need to pass  the argument
v = torch.tensor([0.1, 1.0, 0.001], dtype = torch.float32)
z.backward(v)
print(x.grad)

tensor([1.0841e+00, 1.3064e+01, 5.6848e-03])


In [None]:
x

In [39]:
weights = torch.ones(4, requires_grad = True)
for epoch in range(1):
    model_output = ( weights * 3 ).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])


In [41]:
# If we use repeatedly then gradients accumulate
weights = torch.ones(4, requires_grad = True)
for epoch in range(3):
    model_output = ( weights * 3 ).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [43]:
# If we use repeatedly then gradients accumulate
weights = torch.ones(4, requires_grad = True)
for epoch in range(3):
    model_output = ( weights * 3 ).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() #This resets  gradient

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [47]:
### optimization

weights = torch.ones(4, requires_grad = True)
optimizer = torch.optim.SGD(weights, lr = 0.01)
optimizer.step()
# we will do in next tut - we have error as of now

TypeError: params argument given to the optimizer should be an iterable of Tensors or dicts, but got torch.FloatTensor

In [48]:
!pwd


/Users/jvsingh/work/github/python_codes/ml-python/pytorch_tut
