In [1]:
import torch
import numpy as np

# Tensors

In [2]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

In [3]:
data

[[1, 2], [3, 4]]

In [4]:
x_data

tensor([[1, 2],
        [3, 4]])

In [5]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.5307, 0.3584],
        [0.6453, 0.4945]]) 



In [7]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [9]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
  tensor = tensor.to('cuda')
else:
    print('False')

False


In [12]:
tensor_a = torch.ones(5,5)
tensor_b = torch.eye(5,5)
tensor_a * tensor_b

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [31]:
tensor = torch.ones(4,4)
tensor[0:1,:] = 3
print(tensor)
tensor = torch.ones(4,4)
tensor[:,0:1] = 3
print(tensor)

tensor([[3., 3., 3., 3.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[3., 1., 1., 1.],
        [3., 1., 1., 1.],
        [3., 1., 1., 1.],
        [3., 1., 1., 1.]])


In [20]:
tensor[:,0:2] = 4
tensor

tensor([[4., 4., 4., 4.],
        [4., 4., 1., 1.],
        [4., 4., 1., 1.],
        [4., 4., 1., 2.]])

In [35]:
cat_tensor = torch.cat([tensor_a,tensor_b],dim=0)
cat_tensor

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [34]:
cat_tensor = torch.cat([tensor_a,tensor_b],dim=1)
cat_tensor

tensor([[1., 1., 1., 1., 1., 1., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 0., 1., 0., 0., 0.],
        [1., 1., 1., 1., 1., 0., 0., 1., 0., 0.],
        [1., 1., 1., 1., 1., 0., 0., 0., 1., 0.],
        [1., 1., 1., 1., 1., 0., 0., 0., 0., 1.]])

In [44]:
# matrix multiplication
tensor_a = torch.full((2,2),3)
tensor_b = torch.full((2,2),4)

In [45]:
tensor_a * tensor_b

tensor([[12, 12],
        [12, 12]])

In [46]:
tensor_a.matmul(tensor_b.T)

tensor([[24, 24],
        [24, 24]])

In [48]:
# in place operations have _ suffix
tensor = torch.ones((3,3))
tensor.add_(5)

tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])

In [52]:
# makes a shallow copy
t = torch.ones(5)
n = t.numpy()

# AutoGrad
automatic differntiation engine that powers neural net training

#### Neural Nets
- NN are a collection of nested functions that are executed on some input data
- these functions are defined by parameters (consisting of weights and biases) whihc Pytorch stored in tensors

#### Training in 2 steps
Forware Propogation
- NN makes it best guess about the correct output
- runs input data through each of its functions to make this guess

Backware Propogation
- NN adjusts params proportianate to the error in its guess
- it does this by treversing backwards from the output, collection the derivatves of the error with respect ot the the parameters of the functions (gradients), and optimizing the parameters using gradient descent

https://www.youtube.com/watch?v=tIeHLnjs5U8

# Example Train

In [53]:
import torch, torchvision
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1,3,64,64)
labels = torch.rand(1,1000)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /Users/justinfernandez/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




- load pretrained resnet18 model
- random data to represent a single image with 3 channels
- 64x64
- label = random

In [54]:
prediction = model(data)

- use models pred and label to calculate error
- back propogate error this error throuhg the net

In [57]:
loss = (prediction - labels).sum()
loss.backward()

- Next load optimizer (SGD) with a learning rate of 0.01 and momentum of 0.9
- register all params of model in optimizer

In [62]:
optim = torch.optim.SGD(model.parameters(),lr=1e-2,momentum=0.9)

call step to intiate gradient descent

In [64]:
optim.step()

This is evreything to train a model

# Differentiation in Autograd

looking at how autograd collects gradients

In [66]:
# requires grad tells us every operation should be tracked
a = torch.tensor([2.,3.],requires_grad=True)
b = torch.tensor([6.,4.],requires_grad=True)

In [67]:
# Q = 3a^3 - b^2
Q = 3*a**3 - b**2

A and b are parameters of a NN and Q is the error. Training we want graditents of error wrt parameters

when call `.backward()` on Q, autograd calculates these gradients and stores them in the respective tensors `.grad` attribute

In [69]:
external_grad = torch.tensor([1.,1.])
Q.backward(gradient=external_grad)

In [70]:
# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


# Computational Graph

- autograd keeps a record of data (tensors) and all executed operations (along with the resulting new tensors) in a direted acyclic graph (DAG)
- leaves are input tensors, roots are output

In a forward pass, autograd does two things simultaneously:
- run the requested operation to compute a resulting tensor, and
- maintain the operation’s gradient function in the DAG.

The backward pass kicks off when .backward() is called on the DAG root. autograd then:
- computes the gradients from each .grad_fn,
- accumulates them in the respective tensor’s .grad attribute, and
- using the chain rule, propagates all the way to the leaf tensors.

In [71]:
x = torch.rand(5, 5)
y = torch.rand(5, 5)
z = torch.rand((5, 5), requires_grad=True)

a = x + y
print(f"Does `a` require gradients? : {a.requires_grad}")
b = x + z
print(f"Does `b` require gradients?: {b.requires_grad}")

Does `a` require gradients? : False
Does `b` require gradients?: True


Parametrs that dont computre gradients are called **frozen parameters**. Useful to freeze part of model if you wont need the gradients (performance benefits)

In finetuning we freeze most of the model and typically only modify the classifier layers to make predictions

In [72]:
from torch import nn, optim

model = torchvision.models.resnet18(pretrained=True)

# freze all params
for param in model.parameters():
    param.requires_grad = False

In [74]:
# we can finetune model on new datasest with 10 labels
# last linear layer for resent is model.fc
model.fc = nn.Linear(512,10)

In [75]:
# Optimize only the classifier
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)

Stopped here

https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html