In [21]:
import torch

In [22]:
torch.cuda.is_available()

True

In [23]:
# requires_grad = True -> Tracks all operations on the tensor
x = torch.randn(3, requires_grad = True)
y = x+2

In [24]:
print(x)
print(x.dtype)

tensor([-0.6531, -0.5831,  0.7873], requires_grad=True)
torch.float32


In [25]:
print(y)
print(y.dtype)
# grad_fn : references a Function tht has created the Tensor
print(y.grad_fn)

tensor([1.3469, 1.4169, 2.7873], grad_fn=<AddBackward0>)
torch.float32
<AddBackward0 object at 0x7f8cc75a1490>


In [26]:
z = y * y * 3
print(z)

tensor([ 5.4428,  6.0232, 23.3074], grad_fn=<MulBackward0>)


In [27]:
z = z.mean()
print(z)

tensor(11.5911, grad_fn=<MeanBackward0>)


In [28]:
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into .grad attribute.

In [29]:
z.backward()

In [30]:
# dz/dx
print(x.grad)

tensor([2.6939, 2.8339, 5.5746])


In [31]:
# Model with non-scalar output:
# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() 
# specify a gradient argument that is a tensor of matching shape.
# needed for vector-Jacobian product

x = torch.randn(3, requires_grad = True)

In [35]:
y = x * 2

for _ in range(10):
  y = y*2

print(y)
print(y.shape)

tensor([-2245.4829,  -544.1718,    21.6308], grad_fn=<MulBackward0>)
torch.Size([3])


In [None]:
v = torch.tensor([0.1,1.0,0.0001], dtype = torch.float32)
y.backward(v)
print(x.grad)

In [41]:
# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2,2)
print(a.requires_grad)

False


In [42]:
b = ((a*3)/(a-1))
print(b.grad_fn)

None


In [43]:
a.requires_grad_(True)
print(a.requires_grad)

True


In [44]:
b = (a*a).sum()
print(b.grad_fn)

<SumBackward0 object at 0x7f8ca6d0b400>


In [47]:
# .detach(): get a new Tensor with the same content but no gradient computation:
a = torch.randn(2, 2, requires_grad = True)
print(a.requires_grad)
b = a.detach()
print(b.requires_grad)

True
False


In [48]:
# wrap in 'with torch.no_grad():'
a = torch.randn(2, 2, requires_grad = True)
print(a.requires_grad)
with torch.no_grad():
  print((x**2).requires_grad)

True
False


In [49]:
# Use .zero_() to empty the gradients before a new optimization step!
weights = torch.ones(4, requires_grad = True)

for epoch in range(3):
  # dummy example
  model_output = (weights*3).sum()
  model_output.backward()

  print(weights.grad)
  # optimizing model(adjusting weights)
  with torch.no_grad():
    weights -= 0.1 * weights.grad
  
  weights.grad.zero_()

print(weights)
print(model_output)

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)
