In [1]:
import torch

In [2]:
# Example z = 2*(a-b) + c
# a, b, c can be tensor scalar, 1st order tensor, 2nd order tensor
# In the last two cases, the operation is elementwise.
def cal_z(a,b,c):
    r1 = torch.sub(a,b)
    r2 = torch.mul(r1, 2)
    z = torch.add(r2, c)
    return z

In [3]:
# Try different inputs
a = torch.tensor(1)
b = torch.tensor(2)
c = torch.tensor(3)
print('Scalar Inputs:', cal_z(a,b,c))

a = torch.tensor([1])
b = torch.tensor([2])
c = torch.tensor([3])
print('Rank 1 Inputs:', cal_z(a,b,c))

a = torch.tensor([[1]])
b = torch.tensor([[2]])
c = torch.tensor([[3]])
print('Rank 1 Inputs:', cal_z(a,b,c))


Scalar Inputs: tensor(1)
Rank 1 Inputs: tensor([1])
Rank 1 Inputs: tensor([[1]])


## Generate tensors that require gradient

In [4]:
# Set requires_grad=True to initialize a tensor with a gradient
a = torch.tensor(3.14, requires_grad=True)
print(a)

tensor(3.1400, requires_grad=True)


In [5]:
b = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
print(b)

tensor([1., 2., 3.], requires_grad=True)


In [6]:
# Without requires_grad=True, no gradient will be stored
w = torch.tensor([1.0, 2.0, 3.0])
print(w.requires_grad)

False


In [7]:
# Manually turn on the option
w.requires_grad_()

tensor([1., 2., 3.], requires_grad=True)

## Generate random tensors

In [8]:
torch.manual_seed(1)
# This creates a tensor without initializing its values. 
# It only allocates memory to store this tensor.
# If we print w, we will see highly irregular values
w = torch.empty(2,3)
# This is a special initialization method
# The input w is usually a weight matrix
# The intial entries of w follow normal distribution with zero mean
# However, the variance is calculated using the Xavier initialization formula. 
# This initialization method is designed to ensure that 
# the variance of the activations and gradients of the layer 
# are approximately the same for all layers in the network, 
# which can improve the training performance of deep neural networks.
torch.nn.init.xavier_normal_(w)

tensor([[ 0.4183,  0.1688,  0.0390],
        [ 0.3930, -0.2858, -0.1051]])

In [9]:
# ??? Further use?
# torch.nn.Module is a base class for all neural network modules
# It has a forward method
class MyModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Recall that the second dimension corresponds to input
        # For w1, the input is a 3D vector
        # The output is a 2D vector
        self.w1 = torch.empty(2, 3, requires_grad=True)
        torch.nn.init.xavier_normal_(self.w1)
        # The input for w2 comes from the output of the first layer, which is a 2D vector.
        # Thus the 2nd dimension of w2 has two entries
        # The output of w1 is a scalar
        self.w2 = torch.empty(1, 2, requires_grad=True)
        torch.nn.init.xavier_normal_(self.w2)

## Automatic differentiation

An example   
$$z = wx + b$$
$$Loss = \sum_{i=1}^{N} (y_i - z_i)^2$$
$x$ and $z$ are vetors

In [10]:
# We want to get the gradient of the loss wrt w and b
# Therefore, we set requires_grad=True when defining w and b
w = torch.tensor(1.0, requires_grad=True) 
b = torch.tensor(0.5, requires_grad=True)

x = torch.tensor([1.4])
y = torch.tensor([2.1])

z = torch.add(torch.mul(w, x), b)
loss = (y-z).pow(2).sum()

In [11]:
print(w)
print(b)
print(x)
print(y)
print(z)
print(loss)

tensor(1., requires_grad=True)
tensor(0.5000, requires_grad=True)
tensor([1.4000])
tensor([2.1000])
tensor([1.9000], grad_fn=<AddBackward0>)
tensor(0.0400, grad_fn=<SumBackward0>)


In [12]:
loss.backward()
print('dL/dw', w.grad)
print('dL/db', b.grad)

dL/dw tensor(-0.5600)
dL/db tensor(-0.4000)


Manual verification:   
$$\frac{\partial L}{\partial w} = 2(wx+b-y)x = 2(1 \cdot 1.4 + 0.5 - 2.1)* 1.4=-0.56$$
$$\frac{\partial L}{\partial b} = 2(wx+b-y) = 2(1 \cdot 1.4 + 0.5 - 2.1)=-0.4$$

In [13]:
print(2 * x * (w*x+b-y))

tensor([-0.5600], grad_fn=<MulBackward0>)


The above result contains 'grad_fn=<MulBackward0>' ,which means that the tensor was created as a result of an operation that was tracked by PyTorch's autograd system.