# PyTorch Basics

What's covered:

* PyTorch Tensors and tensor operations
* Working with Numpy
* PyTorch Gradients

In [1]:
# Import PyTorch
import torch

## Tensor Basics

A tensor is a multi-dimensional array object that can store a single type of numeric data as a scalar, vector, or n-dimensional matrix

In [6]:
# Scalar (floating point datatype)
scalar_tensor = torch.tensor(10.)
print(scalar_tensor)
# datatype is 32-bit float by default (single precision)
print(scalar_tensor.dtype)

tensor(10.)
torch.float32


In [7]:
# Vector
vector_tensor = torch.tensor([10., 32, 28, 12, 0])
vector_tensor

tensor([10., 32., 28., 12.,  0.])

In [12]:
# Matrix
matrix_tensor = torch.tensor([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [10, 11, 12]
])
print(matrix_tensor)

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])


What do you think is the type of the `matrix_tensor` object with the above values? 

In [16]:
# Print the datatype of matrix_tensor below:
# YOUR CODE HERE

In [17]:
# multi-dimensional array:
matrix_3d = torch.tensor([
    [[1, 2, 3],
     [4, 5, 6]],
    [[10, 11, 12],
     [13, 14, 15]]
])
matrix_3d

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[10, 11, 12],
         [13, 14, 15]]])

Guess the `shape` of this object

In [18]:
# Hint: `shape` is a property of tensors
# YOUR CODE HERE

## Basic Tensor Operations

### Common initialization methods

**Exercise**: Fill in the blanks "___" where appropriate

In [31]:
# List: 2x2x2 3D matrix:
T_data = [[[1., 2.], [3., 4.]],
          [[5., 6.], [7., 8.]]]
T = torch.tensor(T_data)
print(T)

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])


Creating a full tensor

In [32]:
full_tensor = torch.___((2, 4, 3), 10.)
full_tensor

tensor([[[10., 10., 10.],
         [10., 10., 10.],
         [10., 10., 10.],
         [10., 10., 10.]],

        [[10., 10., 10.],
         [10., 10., 10.],
         [10., 10., 10.],
         [10., 10., 10.]]])

Tensor of zeros, and tensor of ones...

In [39]:
ones_tensor = torch.___((1,4))
ones_tensor # why type should this be? What if we add `, dtype=torch.int8`

tensor([[1., 1., 1., 1.]])

In [41]:
zeros_tensor = torch.___((1,4))
zeros_tensor

tensor([[0., 0., 0., 0.]])

Generating a random matrix using `randn`

In [44]:
normal_random_tensor = torch.___((1,3,2))
normal_random_tensor

tensor([[[-1.8238, -1.0401],
         [ 0.6759, -0.5347],
         [ 0.1863,  0.9090]]])

In [47]:
# stacking the tensors (concatenate) using `cat`
ones_zeros_stack_t = torch.___((ones_tensor, zeros_tensor))
ones_zeros_stack_t

tensor([[1., 1., 1., 1.],
        [0., 0., 0., 0.]])

### Other common methods

**Exercise** fill - using `eye`, `arange`, `empty`, `diag`

In [52]:
# empty: creates a 3x3 uninitialized matrix
print(torch.___(size = (3,3))) 

# zeros: size is the first arg
print(torch.___((3,3))) 

# rand: initialized from unif distribution [0,1]
print(torch.___((3,3))) 
      
# ones: all ones
print(torch.___((3,3))) 

# eye: identity matrix, ones on diag & rest zeros
print(torch.___(5,5))

# arange: ~similar to matlab~
print(torch.___(start=0, end=5, step=1)) 

# linear spacing - linspace - ~similar to matlab~
print(torch.___(start=0.1, end=1, steps=10))

# normally dist empty tensor
print(torch.___(size=(1,5)).normal_(mean=0, std=1))

# unif with lower & upper 
print(torch.___(size=(1,5)).uniform_(0,1))

 # same as identity matrix of size 3 
print(torch.___(torch.ones(3)))

# Solution
if False:
    print(torch.empty(size = (3,3))) # creates a 3x3 uninitialized matrix
    print(torch.zeros((3,3))) # size is the first arg
    print(torch.rand((3,3))) # initialized from unif distribution [0,1]
    print(torch.ones((3,3))) # all ones
    print(torch.eye(5,5)) # identity matrix, ones on diag & rest zeros
    print(torch.arange(start=0, end=5, step=1))
    print(torch.linspace(start=0.1, end=1, steps=10))
    print(torch.empty(size=(1,5)).normal_(mean=0, std=1)) # normally dist
    print(torch.empty(size=(1,5)).uniform_(0,1)) # unif with lower & upper
    print(torch.diag(torch.ones(3))) # same as identity matrix of size 3 

tensor([[1.0441e+21, 6.5919e-10, 3.1369e+27],
        [7.0800e+31, 3.1095e-18, 1.8590e+34],
        [7.7767e+31, 7.1536e+22, 3.3803e-18]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0.0694, 0.2902, 0.4055],
        [0.1760, 0.4771, 0.5593],
        [0.0193, 0.1418, 0.8826]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
tensor([0, 1, 2, 3, 4])
tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
        1.0000])
tensor([[-1.2154, -0.5002,  0.8052,  1.4983,  0.1938]])
tensor([[0.4528, 0.5057, 0.2126, 0.9652, 0.2280]])
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


### Converting types

In [50]:
# Type conversion with PyTorch tensors:
tensor = torch.arange(2)
# convert to boolean (bool, short, long, half, float, double)
print(tensor.___()) # boolean
print(tensor.___()) # short: creates int16
print(tensor.___()) # long: int64, commonly used
print(tensor.___()) # half: float16 for newer GPU based training
print(tensor.___()) # float: float32, often used
print(tensor.___()) # double: float64

tensor([False,  True])
tensor([0, 1], dtype=torch.int16)
tensor([0, 1])
tensor([0., 1.], dtype=torch.float16)
tensor([0., 1.])
tensor([0., 1.], dtype=torch.float64)


### Matrix Multiplication

In [21]:
x = torch.tensor([[1., 2., 3.], [1.5, 0., 3.5]])
y = torch.tensor([[4., 5., 6.], [1., 1., 1.], [-4., 1., 0.]] )
z = x.mm(y)

print (x.shape, y.shape, z.shape)

print (z)

torch.Size([2, 3]) torch.Size([3, 3]) torch.Size([2, 3])
tensor([[-6., 10.,  8.],
        [-8., 11.,  9.]])


### Reshaping

In [22]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12))  # Reshape to 2 rows, 12 columns
# Same as above.  If one of the dimensions is -1, its size can be inferred
print(x.view(2, -1))

tensor([[[ 0.5429,  0.4677, -0.2958, -0.3620],
         [-1.3519, -0.1799, -0.4973,  0.7497],
         [-0.6381, -0.2324, -1.3742, -1.2345]],

        [[ 2.1427,  1.7413, -0.6981, -1.0045],
         [-2.2387,  0.3771, -1.6122, -0.3116],
         [-1.2749, -0.3846,  0.8106, -0.4629]]])
tensor([[ 0.5429,  0.4677, -0.2958, -0.3620, -1.3519, -0.1799, -0.4973,  0.7497,
         -0.6381, -0.2324, -1.3742, -1.2345],
        [ 2.1427,  1.7413, -0.6981, -1.0045, -2.2387,  0.3771, -1.6122, -0.3116,
         -1.2749, -0.3846,  0.8106, -0.4629]])
tensor([[ 0.5429,  0.4677, -0.2958, -0.3620, -1.3519, -0.1799, -0.4973,  0.7497,
         -0.6381, -0.2324, -1.3742, -1.2345],
        [ 2.1427,  1.7413, -0.6981, -1.0045, -2.2387,  0.3771, -1.6122, -0.3116,
         -1.2749, -0.3846,  0.8106, -0.4629]])


## Computational graphs & autodifferentiation

In [23]:
# Tensor factory methods have a ``requires_grad`` flag
x = torch.tensor([-1., 22., 15], requires_grad=True)

# With requires_grad=True, you can still do all the operations you previously
# could
y = torch.tensor([4., 5., 6], requires_grad=True)
z = x + y
print(z)

# BUT z knows something extra.
print(z.grad_fn)

tensor([ 3., 27., 21.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7f3488754880>


In [24]:
# Lets sum up all the entries in z
s = z.sum()
print(s)
print(s.grad_fn)

tensor(51., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x7f34b86154f0>


In [25]:
# calling .backward() on any variable will run backprop, starting from it.
print("x.grad: ", x.grad)
s.backward()
print("x.grad: ", x.grad)

x.grad:  None
x.grad:  tensor([1., 1., 1.])


In [26]:
print(x.requires_grad)
print((x ** 2).requires_grad)

# specify that there is need to calculate gradient
with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


## PyTorch Autograd - Example 1

In [27]:
# Create tensors.
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

# Build a computational graph.
y = w * x + b    # y = 2 * x + 3

# Compute gradients.
y.backward()

# Print out the gradients.
print(x.grad)    # x.grad = 2 
print(w.grad)    # w.grad = 1 
print(b.grad)    # b.grad = 1 

tensor(2.)
tensor(1.)
tensor(1.)


## PyTorch Autograd - Exercise 1

Build a computational graph for $y = ( w_1 * x + b_1 ) * w_2 + b_2$

Compute gradients for $w_1$, $w_2$, $b_1$, and $b_2$


In [28]:
# Create tensors.
x = torch.tensor(1., requires_grad=False)
w1 = torch.tensor(2., requires_grad=True)
b1 = torch.tensor(3., requires_grad=True)
w2 = torch.tensor(4., requires_grad=True)
b2 = torch.tensor(5., requires_grad=True)

# Build a computational graph.
y = ( w1 * x + b1 ) * w2 + b2

# Compute gradients.
y._____()

# Print out the gradients.
print(x.grad)
print(w1.grad)
print(b1.grad)
print(w2.grad)
print(b2.grad)

None
tensor(4.)
tensor(4.)
tensor(5.)
tensor(1.)


## PyTorch Autograd - Example 2

In [30]:
import torch.nn as nn

# Create tensors of shape (10, 3) and (10, 2).
x = torch.randn(10, 3)
y = torch.randn(10, 2)

# Build a fully connected layer.
linear = nn.Linear(3, 2)
print ('w: ', linear.weight)
print ('b: ', linear.bias)

# Build loss function and optimizer.
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

# Forward pass.
pred = linear(x)

# Compute loss.
loss = criterion(pred, y)
print('loss: ', loss.item())

# Backward pass.
loss.backward()

# Print out the gradients.
print ('dL/dw: ', linear.weight.grad) 
print ('dL/db: ', linear.bias.grad)

# 1-step gradient descent.
optimizer.step()

# You can also perform gradient descent at the low level.
# linear.weight.data.sub_(0.01 * linear.weight.grad.data)
# linear.bias.data.sub_(0.01 * linear.bias.grad.data)

# Print out the loss after 1-step gradient descent.
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())

w:  Parameter containing:
tensor([[ 0.4288, -0.4523,  0.2576],
        [-0.2160, -0.1148,  0.3806]], requires_grad=True)
b:  Parameter containing:
tensor([ 0.0357, -0.4274], requires_grad=True)
loss:  1.5952345132827759
dL/dw:  tensor([[ 0.2396, -0.0549,  0.4369],
        [-0.6651, -0.1090,  0.7836]])
dL/db:  tensor([ 0.0146, -0.4515])
loss after 1 step optimization:  1.58008873462677
