## This is a tutorial for Pytorch.

### Tensors

In [53]:
# Make the necessary imports.
import torch, torchvision
import numpy as np
from torch import nn, optim

In [21]:
# Initialise the tensors.
data = [[1,2],[3,4]]
x_data = torch.tensor(data)

In [22]:
# Also, tensors can be created from numpy arrays.
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [23]:
x_ones = torch.ones_like(x_data)
print(f"Ones Tensor: \n {x_ones} \n") # torch retains the data type unless explicitly overridden

x_rand = torch.rand_like(x_data, dtype=torch.float) # override data type
print(f"Random Tensor: \n {x_rand}\n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.6180, 0.6362],
        [0.4021, 0.6199]])



In [24]:
# shape is a tuple of tensor dimensions
shape = (2,3)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor} \n")

Random Tensor: 
 tensor([[0.2497, 0.7111, 0.8379],
        [0.6816, 0.5144, 0.3254]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]]) 



In [25]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [26]:
# Move our tensor to GPU if available
if torch.cuda.is_available():
    tensor = tensor.to('cuda')

print(f"Device tensor is stored on: {tensor.device}")

Device tensor is stored on: cuda:0


In [27]:
tensor = torch.ones(4,4)
tensor[:,1] = 0
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [28]:
# concatenate tensors

t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [29]:
# Multiply element-wise tensors
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")

# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor} \n")

tensor.mul(tensor) 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor * tensor 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 



In [30]:
# Matrix multiplication
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")

# Alternative syntax
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor.matmul(tensor.T) 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


In [31]:
# In place operations: operations that have _ are in place.
# The use of in place operations is discouraged because they leed to a loss of history and derivatives can not be computed.
print(tensor, "\n")
tensor.add_(5)
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


In [32]:
# From tensor to numpy
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


In [33]:
# Changes in tensors reflect changes in the numpy array.
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


In [34]:
# NumPy array to tensor.
n = np.ones(5)
t = torch.from_numpy(n)

In [35]:
# Changes in numpy array are reflected on tensors.
np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
n: [2. 2. 2. 2. 2.]


### torch.autograd

In [38]:
# Load a pretrained model.
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/abar/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth
100.0%


In [40]:
# make a prediction.
prediction = model(data)

In [43]:
# Caculate the loss and start backpropagating by calculating the gradients.
loss = (prediction - labels).sum()
loss.backward()

RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.

In [44]:
# Set the optimizer
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [46]:
# Change the weights accordingly to the gradients following stochastic gradient descent with momentum.
optim.step()

In [47]:
# Details of autograd. For this reason we define two tensors that require gradients.
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

We create a new tensor that is a function of a,b.
$$ Q = 3*a^3 - b^2 $$

In [48]:
Q = 3*a**3 - b**2 # Assume that Q is the error function.

In [49]:
# When we call .backward() on Q, autograd calculates the derivatives of Q with respect to all its parameters - variables and stores them in .grad attribute. We can either aggregate Q into a scalar and call .backward implicitly (Q.sum().backward()) or explicitly pass to .backward an argument vector "gradient" of Q w.r.t. itself.

external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [51]:
# Gradients are stored in .grad
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


In [52]:
x = torch.rand(5,5)
y = torch.rand(5,5)
z = torch.rand((5,5), requires_grad=True)

a = x + y
print(f"Does 'a' require gradients? : {a.requires_grad}")
b = x + z
print(f"Does 'z' require gradients? : {z.requires_grad}")

Does 'a' require gradients? : False
Does 'z' require gradients? : True


In [54]:
model = torchvision.models.resnet18(pretrained=True)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [55]:
model.fc = nn.Linear(512, 10)
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)