# https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html

## Tensors

In [4]:
import torch
import numpy as np

### Tensor Initialization

Tensors are a data structure similar to arrays and matrices.
Tensors encode the inputs and outputs of a model, and the model's parameters.
Tensors are similar to numpy ndarrays.

In [5]:
# tensors can be created directly from data - the data type is inferred.
data = [[1, 2], [3,4 ]]
x_data = torch.tensor(data)

In [6]:
# tensors can be created from a numpy array
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [7]:
# tensors can be created from another tensor
# properties like shape/datatype are retained unless overridden
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.0593, 0.5889],
        [0.4823, 0.6593]]) 



In [8]:
# tensors can be created from a tuple of dimensions
shape = (2, 3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.9500, 0.0427, 0.8385],
        [0.5752, 0.1868, 0.8477]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


### Tensor Attributes

In [9]:
# tensor attributes describe shape, datatype, and device on which they're stored
tensor = torch.rand(3, 4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Tensor Operations

Tensor operations include transpose, index, slice, mathematical operations, linear algebra,
random sampling, etc...

See: https://pytorch.org/docs/stable/torch.html

In [10]:
# move a tensor to the GPU, if available
print(f"GPU is available: {torch.cuda.is_available()}")

GPU is available: False


In [11]:
if torch.cuda.is_available():
    tensor = tensor.to('cuda')
    print(f"Device tensor is stored on: {tensor.device}")

EXAMPLE TENSOR OPERATIONS

In [12]:
# numpy-like indexing and slicing
tensor = torch.ones(4, 4)
tensor[:,1] = 0
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [17]:
# joining tensors
# concatenate a sequence of tensors along a given dimension
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [16]:
# concatenate a sequence of tensors along a new dimension
t2 = torch.stack([tensor, tensor, tensor])
print(t2) 

tensor([[[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]]])


In [19]:
# multiplying tensors
# computes the element-wise product
print(tensor)
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")
# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor}")

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])
tensor.mul(tensor) 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor * tensor 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [20]:
# matrix multiplication between tensors
print(tensor)
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# Alternative syntax:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])
tensor.matmul(tensor.T) 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


In [21]:
# in-place operations have a _suffix.
# they can save memory, but cause a loss of history, which affects computing derivatives
# NOT RECOMMENDED

print(tensor, "\n")
tensor.add_(5)
print(tensor, "\n")

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]]) 



### Bridge with NumPy

In [None]:
# tensors on CPU and NumPy arrays can share underlying memory locations
# (changing one will change the other)

In [22]:
# tensor to numpy array
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


In [23]:
# change in tensor is reflected in numpy array
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


In [25]:
# numpy array to tensor
n = np.ones(5)
print(f"n: {n}")
t = torch.from_numpy(n)
print(f"t: {t}")

n: [1. 1. 1. 1. 1.]
t: tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [26]:
# changes in numpy array are reflected in tensor
np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
n: [2. 2. 2. 2. 2.]


# torch.autograd

torch.autograd = automatic differentiation engine that powers neural network (NN) training.

## background 

- NN's are a collection of nested functions that are executed on some input data.
- functions are defined by PARAMETERS (weights and biases), which pytorch stores in tensors

### two steps to training NN:

1. forward propagation - In forward prop, the NN makes its best guess about the correct output. It runs the input data through each of its functions to make this guess.

2. backward propagation - In backprop, the NN adjusts its parameters proportionate to the error in its guess. It does this by traversing backwards from the output, collecting the derivatives of the error with respect to the parameters of the functions (gradients), and optimizing the parameters using gradient descent.

## usage in pytorch

For this example, we load a pretrained resnet18 model from torchvision. We create a random data tensor to represent a single image with 3 channels, and height & width of 64, and its corresponding label initialized to some random values. Label in pretrained models has shape (1,1000).

In [4]:
# disable SSL verification to download model
# https://stackoverflow.com/questions/71263622/sslcertverificationerror-when-downloading-pytorch-datasets-via-torchvision
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

import torch
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/brianfoster/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100.0%


Next, we run the input data through the model through each of its layers to make a prediction. This is the forward pass.

In [5]:
prediction = model(data) # forward pass

We use the model’s prediction and the corresponding label to calculate the error (loss). 
The next step is to backpropagate this error through the network. 
Backward propagation is kicked off when we call .backward() on the error tensor. 
Autograd then calculates and stores the gradients for each model parameter in the parameter’s .grad attribute.

In [8]:
loss = (prediction - labels).sum()
loss.backward() # backward pass

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and momentum of 0.9. We register all the parameters of the model in the optimizer.

In [9]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

Finally, we call .step() to initiate gradient descent. The optimizer adjusts each parameter by its gradient stored in .grad.

In [10]:
optim.step() #gradient descent

## differentiation in autograd

Let’s take a look at how autograd collects gradients. We create two tensors a and b with requires_grad=True. This signals to autograd that every operation on them should be tracked.

In [None]:
import torch

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)