# Gradient Descent



In [1]:
import torch

# we invoke requires_grad=True when we want to find this weight
x = torch.randn(3,requires_grad=True)
y = x + 2
z = y*y*2 #<-- not scalar
#z = z.mean()
z

tensor([9.0473, 8.1674, 5.2391], grad_fn=<MulBackward0>)

In [2]:
# if z is not scaler, we need to create a v vector that is the same size as z
# https://medium.com/unit8-machine-learning-publication/computing-the-jacobian-matrix-of-a-neural-network-in-python-4f162e5db180
v = torch.randn(3)
v

tensor([-0.3002,  0.5990, -0.2228])

In [3]:
z.backward(v,retain_graph=True) #ok after doing this
x.grad

tensor([-2.5543,  4.8422, -1.4426])

Calculate weights without computation

In [4]:
x = torch.randn(3,requires_grad=True)
x

# x.requires_grad_(False)
# x.detach()
# with torch.no_grad():

tensor([ 1.2138, -1.2162,  0.7640], requires_grad=True)

In [5]:
# option 1
x.requires_grad_(False)
x

tensor([ 1.2138, -1.2162,  0.7640])

In [6]:
# option 2
y=x.detach()
y

tensor([ 1.2138, -1.2162,  0.7640])

In [7]:
# option 3
with torch.no_grad():
  y = x + 2
  print(y)


tensor([3.2138, 0.7838, 2.7640])


Failed Example on how NOT to reset the grad ( fail to reset )

In [8]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
  model_output = (weights *3).sum()
  model_output.backward()

  print(weights.grad) # the gradients are accumulated in every loop
  #weights.grad.zero_() #reset zero

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


We managed to reset the grad here

In [9]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
  model_output = (weights *3).sum()
  model_output.backward()

  print(weights.grad) # the gradients are NOT summed in every loop
  weights.grad.zero_() #reset zero

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


Simple Backprop

In [10]:
import torch

x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0,requires_grad=True)

# forward pass and compute the loss and local gradient
y_hat = w*x
loss = (y_hat - y)**2

print(loss)

# backward pass
loss.backward()
print(w.grad)

## update weights
## next forward backward pass

w.sub(w.grad)
w

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


tensor(1., requires_grad=True)