In [47]:
# Thanks, Python engineer!

################# 1,2 - In pytorch everything is a tensor ########################
import torch
import numpy as np

x = torch.empty(2,2,3)   # create empty tensors
zeros = torch.zeros(2,3)
ones = torch.ones(2,5, dtype=torch.int) # torch.float16
#print(zeros)
#print(ones.size())

r2 = torch.rand(1,2)
y = torch.tensor([2.4, 3])
#print(torch.add(r2,y))
#print((r2[0][0].item()))  # print value of tensor (type=float)

#print(torch.rand(5,5).view(25))   # resize to 1-D tensor
#print(torch.rand(4,4).view(-1,8))   # resize to 2x8 = 16 = 4x4

a = torch.ones(10)
#print(a)
b = a.numpy()   # convert tensor to numpy.ndarray
#print(type(b))

a.add_(1)
print(a)
print(b)   # changes both, points to same place in memory

b_t = torch.from_numpy(b)  # convert from numpy to tensor
print(b_t)

z = torch.ones(5, requires_grad=True)  # default is false, when calculates gradient
print(z)

tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])
tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [99]:
# 3 - Fun with gradients using Autograd!

x = torch.randn(3, requires_grad=True)
print(x)

y = x+2 

print(y) # grad_fn=<AddBackward0>

z = y*y*2
print(z) # grad_fn=<MulBackward0>

z = z.mean() #  grad_fn=<MeanBackward0>
#z = z
print(z) # error:grad can be implicitly created only for scalar outputs

v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
#z.backward(v) # dz/dx
z.backward()
print(x.grad)


# Disable gradient caculation, practical for inference:

#x.requires_grad_(False)
#y = x.detach()
#with torch.no_grad():
# x.requires_grad_(True)   # traling underscore -> modify the variable in-place
#print(x)

with torch.no_grad():
    y = x + 10
    print(f"y={y}")   # no grad_fn=<AddBackward0>)

# Calling backward function the gradient for the tensor will be accumulated into the .grad attribute
# Example:

weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*10).sum()
    model_output.backward()                        # values are summed up (explodes) if no grad.zero()
    print(f"weights.grad={weights.grad}")
    weights.grad.zero_()              # gradients emptied, correct to do in training loop

# Optimizer:

#optimizer = torch.optim.SGD(weights, lr=0.1)
#optimizer.step()
#optimizer.zero_grad()

# Remember to empty gradients:

#z.backward()
#weights.grad.zero_


tensor([ 1.1070, -1.2723,  0.2982], requires_grad=True)
tensor([3.1070, 0.7277, 2.2982], grad_fn=<AddBackward0>)
tensor([19.3073,  1.0592, 10.5633], grad_fn=<MulBackward0>)
tensor(10.3099, grad_fn=<MeanBackward0>)
tensor([4.1427, 0.9703, 3.0643])
y=tensor([11.1070,  8.7277, 10.2982])
weights.grad=tensor([10., 10., 10., 10.])
weights.grad=tensor([10., 10., 10., 10.])
weights.grad=tensor([10., 10., 10., 10.])


In [109]:
# 4 - Backpropagation (THEORY)

# x -> a(x) -> y -> b(y) -> z
# dz/dx = dz/dy * dy/dx  (chain rule to get the derivative of interest)

# Computational graph: x,y -> f(x,y) = x*y = z -> prediction
# Partials of interest
# dz/dy = d(x*y)/dy = x*(du/dy) = x*(y') = x*1 = x 
# dz/dx = d(x*y)/dx = y*(du/dx) = y*(x') = y*1 = y

# Need this because of final loss function minimization wrt. inputs: dLoss/dx
# Given dLoss/dz, we can easily get the gratient of loss wrt x by

# dLoss/dx = dLoss/dz * dz/dx

# STEPS in pipeline:

# 1 - Forward pass: Compute loss
# 2 - Compute local gradients
# 3 - Backwards pass: dLoss/dWeights

# Example: linear regression: y_hat = w*x, loss = (y_hat - y)^2 = (w*x - y)^2
# Network x,w -> y_hat -> y_hat - y -> s -> s^2 -> loss    (forward pass)
# dy_hat/dw <- ds/dy*hat <- dloss/ds <- loss (backward pass)
# dLoss/dw <- dLoss/dy_hat <- dLoss/ds

# Example: x=1, y=2, w=1

# 1 - Forward:  1,1 -> y_hat = 1*1, s = (1-2) = s(-1) = (-1)^2 -> loss = 1

# 2 - Calculate local gradients:

# dLoss/ds = d(s^2)/ds = 2s
# ds/dy_hat = d(y-y_hat)/dy_hat = y
# dy_hat/dW = d(w*x)/dw = x

# 3 - Backward pass
# dLoss/ds = 2s = 2(-1) = -2
# ds/dy_hat = y = 1

# dLoss/dy_hat = dLoss/ds * ds/dy_hat
# dLoss/dy_hat =    -2    *    1           = -2

# dLoss/dw = dLoss/dy_hat * dy_hat/dw
#          =     -2       *    x
#          =     -2       *    1       = -2

# dLoss / dw = -2 

# PYTORCH VERIFICATION OF CALCULATION ABOVE

x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True) # need the gradients

# Forward pass
y_hat = w*x
loss = (y_hat - y)**2

print(f"loss={loss}")

# Backward pass
loss.backward()
print(f"weight gradient={w.grad}")    # should be -2 (correct)


loss=1.0
weight gradient=-2.0
