<a href="https://colab.research.google.com/github/above-avg/pytorchblitz/blob/main/pytorchblitz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction to Tensors in Pytorch

In [None]:
import torch
import numpy as np

In [None]:
data = [[1,2],[3,4]]
x_data = torch.tensor(data)

In [None]:
np_array =  np.array(data)
x_np = torch.from_numpy(np_array)

In [None]:
x_ones = torch.ones_like(x_data)
print(f"Ones Tensor: \n {x_ones}\n")

x_rand = torch.rand_like(x_data, dtype=torch.float)
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]])

Random Tensor: 
 tensor([[0.8258, 0.3626],
        [0.2661, 0.5532]]) 



In [None]:
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensors : \n {rand_tensor}\n")
print(f"Ones Tensor: \n {ones_tensor}\n")
print(f"Zeros Tensor: \n {zeros_tensor}\n")

Random Tensors : 
 tensor([[0.2086, 0.7353, 0.0478],
        [0.5218, 0.7553, 0.8060]])

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]])

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])



In [None]:
tensor = torch.rand(3,4)

print(f"Shape of the tensor {tensor.shape}")
print(f"Dtype of the tensor {tensor.dtype}")
print(f"Device the tensor stored in: {tensor.device}")

Shape of the tensor torch.Size([3, 4])
Dtype of the tensor torch.float32
Device the tensor stored in: cpu


In [None]:
if torch.cuda.is_available():
  tensor = tensor.to('cuda')
  print(f"Device, the tensor is on: {tensor.device}")

Device, the tensor is on: cuda:0


In [None]:
tensor = torch.ones(4,4)
tensor[:,1] = 0
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [None]:
t1 = torch.cat([tensor,tensor,tensor],dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [None]:
print(f"tensor.mul(tensor)\n {tensor.mul(tensor)}\n")
#alt syntax
print(f"tensor * tensor \n {tensor * tensor}")

tensor.mul(tensor)
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

tensor * tensor 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [None]:
print(f"tensor.matmul(tensor)\n {tensor.matmul(tensor)}\n")
# alt syntax
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor.matmul(tensor)
 tensor([[3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.]])

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


In [None]:
print(tensor, "\n")
tensor.add_(5)
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


In [None]:
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


In [None]:
t.add_(1)
print(t)
print(n)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [None]:
n = np.ones(5)
t = torch.from_numpy(n)

In [None]:
np.add(n, 1, out=n)

array([2., 2., 2., 2., 2.])

In [None]:
print(t)

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [None]:
print(n)

[2. 2. 2. 2. 2.]


# A gentle introduction to torch.autograd (works only on CPU)


In [None]:
import torch
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1,3,64,64)
labels = torch.rand(1,1000)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 140MB/s]


In [None]:
prediction = model(data) # forward pass

In [None]:
loss = (prediction-labels).sum()
loss.backward() #backward pass

In [None]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [None]:
optim.step() #gradient descent

## Differentiation in Autograd

In [None]:
import torch

a = torch.tensor([2.,3.], requires_grad=True)
b = torch.tensor([6.,4.], requires_grad=True)

In [None]:
Q = 3*a**3 - b**2

In [None]:
external_grad = torch.tensor([1.,1.])
Q.backward(gradient=external_grad)

In [None]:
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


In [None]:
x = torch.rand(5,5)
y = torch.rand(5,5)
z = torch.rand((5,5), requires_grad=True)

a = x+y
print(f"Does 'a' require gradients?: {a.requires_grad}")
b = x+z
print(f"Does 'b' require gradients?: {b.requires_grad}")

Does 'a' require gradients?: False
Does 'b' require gradients?: True


In [None]:
from torch import nn, optim

model = resnet18(weights=ResNet18_Weights.DEFAULT)

#Freeze all the parameters in the network
for param in model.parameters():
  param.requires_grad = False

In [None]:
model.fc = nn.Linear(512, 10)

In [None]:
#Optimize only the classifier
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

# Neural Networks

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

  def __init__(self):
    super(Net,self).__init__()
    # 1 input img channels, 6 ouput channels, 5x5 square convolution kernel
    self.conv1 = nn.Conv2d(1, 6, 5)
    # 6 input img channels, 16 output channels, 5x5 square convolution kernel
    self.conv2 = nn.Conv2d(6, 16, 5)
    # fully connected layers gradually bringing down 16*5*5  to 10
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,10)

  def forward(self, input):
    # First conv layer to convert grey-scale into 6 feature maps 5x5 kernels
    c1 = F.relu(self.conv1(input)) # [1,1,32,32] -> [1,6,28,28]
    # Getting the max from the features for robustness
    s2 = F.max_pool2d(c1, (2,2)) # [1,6,28,28] -> [1,6,14,14]
    # Second conv layter to convert 6 feature maps to 16 (computational size decrease, complexity increase)
    c3 = F.relu(self.conv2(s2)) # [1,6,14,14] -> [1,16,10,10]
    # Getting the max from the features for robustness
    s4 = F.max_pool2d(c3, 2) # [1,16,10,10] -> [1,16,5,5]
    # Converting it into a 1D feature list
    s4 = torch.flatten(s4, 1) # [1,16,5,5] -> [1,16*5*5 = 400]
    '''
    Fully connected layer converts 400 to 120:
    nn.Linear(16*5*5,120) basically creates two things:
    weight matrix: [120,400] (120 neurons with 400 weights each)
    bias vector: [120] (120 neurons with 1 bias each)

    then, output = input @ weight.T + bias
                  [1,400] * [400,120] + [120] = [1,120]

    other fc layers also follow the same technique:
                  [1,120] -> [1,84]
                  [1,84]  -> [1,10]
    '''
    f5 = F.relu(self.fc1(s4))

    f6 = F.relu(self.fc2(f5))

    output = self.fc3(f6)
    return output

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) #conv1's weight

10
torch.Size([6, 1, 5, 5])


In [4]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0197, -0.0060,  0.0412, -0.1049, -0.1856, -0.1384,  0.0042, -0.0105,
         -0.0603,  0.1498]], grad_fn=<AddmmBackward0>)


In [5]:
net.zero_grad()
out.backward(torch.randn(1,10))

## Loss function

In [6]:
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.3137, grad_fn=<MseLossBackward0>)


In [7]:
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) #ReLU

<MseLossBackward0 object at 0x7f3262cfbbe0>
<AddmmBackward0 object at 0x7f3262a9c940>
<AccumulateGrad object at 0x7f32640107c0>


## Backprop

In [8]:
net.zero_grad() #zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0052,  0.0101,  0.0083,  0.0010,  0.0049, -0.0013])


## Update the weights
The simplest update rule used is the Stochastic Gradient Descnet (SGD)

In [10]:
# weight = weight - learning_rate * gradient

In [11]:
learning_rate = 0.01
for f in net.parameters():
  f.data.sub_(f.grad.data * learning_rate)

In [12]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # does the update