# Intro to PyTorch

In [1]:
import torch

In [2]:
# Create a tensor:
example_tensor = torch.Tensor(
    [
        [1,2], [3,4],
        [5,6], [7,8],
        [9,10], [11,12]
    ]
)

example_tensor

tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.],
        [11., 12.]])

## Tensor properties

In [3]:
# Check whether your tensor is on the cpu, or the gpu:
example_tensor.device

device(type='cpu')

In [4]:
# Shift tensor to the gpu:
# new_tensor = example_tensor.to(device)  # where device = 'cpu', or 'cuda'

In [5]:
# Print shape and size:
print(example_tensor.shape)
print(example_tensor.size(0), example_tensor.size(1))

torch.Size([6, 2])
6 2


In [6]:
# Rank:
len(example_tensor.shape)

2

In [7]:
# Number of elements:
example_tensor.numel()

12

In [8]:
# Index a tensor:
example_tensor[1,1]

tensor(4.)

In [9]:
example_tensor[:,0]

tensor([ 1.,  3.,  5.,  7.,  9., 11.])

In [10]:
example_tensor[0,:]

tensor([1., 2.])

## Initialize tensors

In [11]:
torch.ones_like(example_tensor)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [12]:
torch.zeros_like(example_tensor)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])

In [13]:
torch.randn_like(example_tensor)

tensor([[-0.8430, -1.4126],
        [-0.6290,  0.2915],
        [ 0.4129, -1.0184],
        [-1.7985,  1.0083],
        [-0.2738, -0.3596],
        [ 0.2478, -0.9492]])

In [14]:
torch.randn(2, 2, device='cpu')

tensor([[-1.5614, -0.8446],
        [ 0.5287,  0.3602]])

## Basic functions

In [16]:
(example_tensor-5) * 2

tensor([[-8., -6.],
        [-4., -2.],
        [ 0.,  2.],
        [ 4.,  6.],
        [ 8., 10.],
        [12., 14.]])

In [17]:
example_tensor.mean()

tensor(6.5000)

In [18]:
example_tensor.std()

tensor(3.6056)

In [20]:
# Mean along a particular dimension:
example_tensor.mean(0)  # 0th dimension

tensor([6., 7.])

## Neural network module

In [24]:
import torch.nn as nn
linear = nn.Linear(10,2)  # input, output
linear_input = torch.randn(3,10)
linear_output = linear(linear_input)
linear_output

tensor([[ 0.7766,  0.7931],
        [-0.6373, -0.2534],
        [ 0.1682, -0.7006]], grad_fn=<AddmmBackward0>)

In [26]:
relu = nn.ReLU()
relu_output = relu(linear_output)
relu_output

tensor([[0.7766, 0.7931],
        [0.0000, 0.0000],
        [0.1682, 0.0000]], grad_fn=<ReluBackward0>)

In [28]:
batchnorm = nn.BatchNorm1d(2)  # input dimensions
batchnorm_output = batchnorm(relu_output)
batchnorm_output

tensor([[ 1.3839,  1.4142],
        [-0.9441, -0.7071],
        [-0.4398, -0.7071]], grad_fn=<NativeBatchNormBackward0>)

In [29]:
mlp_layer = nn.Sequential(
    nn.Linear(5,2),
    nn.BatchNorm1d(2),
    nn.ReLU()
)

mlp_input = torch.randn(5,5) + 1
mlp_layer(mlp_input)

tensor([[0.0000, 0.0668],
        [0.0000, 0.1340],
        [1.1970, 1.5653],
        [0.8587, 0.0000],
        [0.2530, 0.0000]], grad_fn=<ReluBackward0>)

## Optimizers

In [30]:
import torch.optim as optim
adam_opt = optim.Adam(mlp_layer.parameters(), lr=1e-1)

## Training loop

1. Set gradients to zero: `opt.zero_grad()`
2. Compute loss.
3. Back propagate: `loss.backward()`
4. Update parameters: `opt.step()`

In [37]:
train_input = torch.randn(100,5) + 1
adam_opt.zero_grad()
cur_loss = torch.abs(1 - mlp_layer(train_input)).mean()
cur_loss.backward()
adam_opt.step()
cur_loss

tensor(0.4742, grad_fn=<MeanBackward0>)

## Gradients

In [38]:
# Calculate the gradient with respect to a particular tensor if PyTorch wouldn't
# have normally stored the gradient for that tensor. (Changes it in place).

# example_tensor.requres_grad_()

In [39]:
# To prevent gradients from being calculated for a block of code:
# with torch.no_grad():
#     ...

In [40]:
# Use a tensor's value without calculating its gradients:
# detach()

## Extending the `nn` module

Whatever is in that class, becomes a parameter.

In [41]:
class MyNet(nn.Module):
    def __init__(self, input_dims, output_dims):
        super(MyNet, self).__init__()
        self.linear = nn.Linear(input_dims, output_dims)
        self.exponent = nn.Parameter(torch.tensor(1.))
    
    def forward(self, x):
        x = self.linear(x)
        x = x ** self.exponent
        return x

In [42]:
example_mynet = MyNet(10,2)
list(example_mynet.parameters())

[Parameter containing:
 tensor(1., requires_grad=True),
 Parameter containing:
 tensor([[-0.1318, -0.0488,  0.1173, -0.0171, -0.0805,  0.2370, -0.0581, -0.0773,
          -0.1309, -0.3126],
         [ 0.0907, -0.0775,  0.1297, -0.0939,  0.2540,  0.2680, -0.2161,  0.1853,
          -0.1456, -0.1704]], requires_grad=True),
 Parameter containing:
 tensor([-0.0323,  0.1736], requires_grad=True)]

In [44]:
list(example_mynet.named_parameters())

[('exponent',
  Parameter containing:
  tensor(1., requires_grad=True)),
 ('linear.weight',
  Parameter containing:
  tensor([[-0.1318, -0.0488,  0.1173, -0.0171, -0.0805,  0.2370, -0.0581, -0.0773,
           -0.1309, -0.3126],
          [ 0.0907, -0.0775,  0.1297, -0.0939,  0.2540,  0.2680, -0.2161,  0.1853,
           -0.1456, -0.1704]], requires_grad=True)),
 ('linear.bias',
  Parameter containing:
  tensor([-0.0323,  0.1736], requires_grad=True))]

In [45]:
example_mynet(torch.randn(2,10))

tensor([[ 0.2511,  0.5484],
        [-0.3156, -0.7913]], grad_fn=<PowBackward1>)

## 2D operations

In [None]:
# nn.Conv2D: input, output, kernel_size
# nn.ConvTranspose2d: input, output, kernel_size
# nn.BatchNorm2d: input
# nn.Upsample: final_size or scale_factor
# nn.functional.interpolate: final_size or scale_factor