In [1]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.0.1+cu117
True


In [2]:
!nvidia-smi

Sun Jun  4 17:24:29 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.68                 Driver Version: 531.68       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:01:00.0 Off |                  N/A |
| N/A   41C    P8                5W /  N/A|      0MiB /  6144MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Introduction to Tensors
## Creating Tensors
`torch.tensor()`

### Scalar

In [3]:
scalar = torch.tensor(6)
scalar

tensor(6)

In [4]:
scalar.ndim

0

In [5]:
# To get value of tensor
scalar.item()

6

### Vector

In [6]:
vector = torch.tensor([2, 5])
vector

tensor([2, 5])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([2])

In [9]:
vector[1]

tensor(5)

### Matrix

In [10]:
matrix = torch.tensor([[1, 2], 
                       [3, 4]])
matrix

tensor([[1, 2],
        [3, 4]])

In [11]:
matrix.ndim

2

In [12]:
matrix.shape

torch.Size([2, 2])

In [13]:
matrix[0]

tensor([1, 2])

### Tensor

In [14]:
tensor = torch.tensor([[[1, 2, 3],
                       [2, 4, 6],
                       [6, 7, 3]]])
tensor

tensor([[[1, 2, 3],
         [2, 4, 6],
         [6, 7, 3]]])

In [15]:
tensor.ndim

3

In [16]:
tensor.shape   # this represents one(1) 3x3 matrix

torch.Size([1, 3, 3])

In [17]:
tensor[0]

tensor([[1, 2, 3],
        [2, 4, 6],
        [6, 7, 3]])

In [18]:
tensor[0][2]

tensor([6, 7, 3])

### Random Tensors
`torch.rand(size)`

In [19]:
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.6756, 0.5590, 0.1858, 0.4716],
        [0.2803, 0.3001, 0.3578, 0.6401],
        [0.7227, 0.4425, 0.9267, 0.5682]])

In [20]:
random_tensor.ndim

2

In [21]:
# Creating a tensor of shape of an image

img_tensor = torch.rand(size=(3, 224, 224))  # no. of color channels, height, width
img_tensor

tensor([[[0.3955, 0.4535, 0.5089,  ..., 0.9481, 0.9861, 0.6592],
         [0.5672, 0.0390, 0.7995,  ..., 0.4631, 0.0061, 0.7466],
         [0.1039, 0.7800, 0.7978,  ..., 0.8002, 0.1981, 0.2865],
         ...,
         [0.2687, 0.7271, 0.3954,  ..., 0.5722, 0.7938, 0.4724],
         [0.3909, 0.2547, 0.1174,  ..., 0.0517, 0.3203, 0.4984],
         [0.3506, 0.1685, 0.3401,  ..., 0.3514, 0.7068, 0.0406]],

        [[0.4064, 0.6831, 0.4439,  ..., 0.4471, 0.7389, 0.4000],
         [0.0948, 0.0073, 0.0057,  ..., 0.8700, 0.6316, 0.3754],
         [0.9455, 0.9507, 0.5699,  ..., 0.5968, 0.7966, 0.6715],
         ...,
         [0.1126, 0.0493, 0.0761,  ..., 0.9346, 0.9373, 0.8294],
         [0.6786, 0.8058, 0.9972,  ..., 0.6644, 0.4534, 0.2308],
         [0.1252, 0.0839, 0.7870,  ..., 0.7490, 0.7282, 0.7870]],

        [[0.7968, 0.5785, 0.2257,  ..., 0.9581, 0.7127, 0.3428],
         [0.9777, 0.3976, 0.1411,  ..., 0.6036, 0.4081, 0.5794],
         [0.0395, 0.1068, 0.2334,  ..., 0.0628, 0.8882, 0.

In [22]:
img_tensor.shape, img_tensor.ndim

(torch.Size([3, 224, 224]), 3)

In [23]:
torch.rand((2, 4))

tensor([[0.2641, 0.6767, 0.4403, 0.3922],
        [0.1380, 0.4463, 0.8644, 0.9850]])

### Zeros and Ones
`torch.zeros()`
`torch.ones()`

In [24]:
zeros = torch.zeros(size=(2, 3))
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [25]:
ones = torch.ones((3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

### Creating a range of tensors
`torch.arange(start, end)`

In [26]:
range_tensor = torch.arange(1, 10)
range_tensor

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [27]:
range_tensor.dtype

torch.int64

### Creating tensors like
`torch.zeros_like()`
`torch.ones_like()`

In [28]:
a = torch.zeros_like(range_tensor)
a

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0])

In [29]:
a = torch.ones_like(random_tensor)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

### Other parameters of tensor

In [30]:
float_32_tensor = torch.tensor([1, 2, 3], 
                              dtype=torch.float32,  # what datatype is the tensor
                              device="cuda",        # what device is your tensor on
                              requires_grad=False)  # whether to track gradients with tensor operations
float_32_tensor

tensor([1., 2., 3.], device='cuda:0')

In [31]:
float_32_tensor.dtype

torch.float32

In [32]:
# Converting one datatype to other datatype

float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([1., 2., 3.], device='cuda:0', dtype=torch.float16)

### Common Errors in Tensors
- Tensors not right datatypes - to get data type, use `tensor.dtype`
- Tensors not right shape - to get shape of a tensor, use `tensor.shape`
- Tensors not on the right device - To get device of a tensor, use `tensor.device`

In [33]:
new_tensor = torch.rand(3, 2)
new_tensor

tensor([[0.1375, 0.1608],
        [0.8665, 0.7017],
        [0.8089, 0.7357]])

In [34]:
# Details of this tensor

print(new_tensor)
print(f"Shape of tensor: {new_tensor.shape}")
print(f"Data type of tensor: {new_tensor.dtype}")
print(f"Device of tensor: {new_tensor.device}")

tensor([[0.1375, 0.1608],
        [0.8665, 0.7017],
        [0.8089, 0.7357]])
Shape of tensor: torch.Size([3, 2])
Data type of tensor: torch.float32
Device of tensor: cpu


## Tensor Operations
* Addition
* Subtraction
* Multiplication (element wise)
* Division
* Matrix Multiplication

In [35]:
tensor = torch.tensor([1, 2, 3])
tensor

tensor([1, 2, 3])

In [36]:
tensor + 10

tensor([11, 12, 13])

In [37]:
tensor * 10

tensor([10, 20, 30])

In [38]:
torch.mul(tensor, -2)

tensor([-2, -4, -6])

In [39]:
tensor * torch.tensor(5)

tensor([ 5, 10, 15])

In [40]:
3 - tensor

tensor([2, 1, 0])

In [41]:
tensor * tensor

tensor([1, 4, 9])

In [42]:
# Matrix Multiplication (Dot Product)

torch.matmul(tensor, tensor)   # 1*1 + 2*2 + 3*3

tensor(14)

`torch.matmul(A, B)`
`torch.mm(a, B)`

In [43]:
tensor_A = torch.tensor([[1, 2], 
                        [3, 4], 
                        [5, 6]])
tensor_B = torch.tensor([[1, 1],
                        [2, 1], 
                        [3, 2]])

In [44]:
tensor_A, tensor_B

(tensor([[1, 2],
         [3, 4],
         [5, 6]]),
 tensor([[1, 1],
         [2, 1],
         [3, 2]]))

In [45]:
torch.matmul(tensor_A, tensor_B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [46]:
tensor_B.T, tensor_B.T.shape

(tensor([[1, 2, 3],
         [1, 1, 2]]),
 torch.Size([2, 3]))

In [47]:
# Transpose of a tensor/matrix

torch.mm(tensor_A, tensor_B.T)

tensor([[ 3,  4,  7],
        [ 7, 10, 17],
        [11, 16, 27]])

### Tensor Aggregations

In [48]:
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [49]:
torch.min(x), x.min()

(tensor(0), tensor(0))

In [50]:
torch.max(x), x.max()

(tensor(90), tensor(90))

In [51]:
torch.sum(x), x.sum()

(tensor(450), tensor(450))

In [52]:
# Calculating mean is only done for float tensors and not int tensors

torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

### Finding positional min and max (index of min and max elements in tensor)

In [53]:
x = x + 1
x

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [54]:
x.argmin()

tensor(0)

In [55]:
x.argmax()

tensor(9)

## Reshaping, Squeezing, Stacking Tensors

* **Reshaping** - Reshapes an input tensor to a defined shape
* **View** - Returns a view of input tensor of certain shape but keeps the same memory of original tensor
* **Stacking** - combine multiple tensors on top of each other(vstack) or side-by-side(hstack)
* **Squeezing** - Remove all `1` dimensions from the tensor
* **Unsqueezing** - Add a `1` dimension to a target tensor
* **Permute** - Returns a view of input with dimensions permuted(swapped) in a certain way

In [56]:
a = torch.arange(1., 11)
a, a.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [57]:
a.reshape(2, 5)

tensor([[ 1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10.]])

In [58]:
a.reshape(5, -1)

tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [59]:
# View (share same memory; modifying view modifies original tensor too)

z = a.view(10, 1)
z, z.shape

(tensor([[ 1.],
         [ 2.],
         [ 3.],
         [ 4.],
         [ 5.],
         [ 6.],
         [ 7.],
         [ 8.],
         [ 9.],
         [10.]]),
 torch.Size([10, 1]))

In [60]:
z[3][0] = 55
z, a

(tensor([[ 1.],
         [ 2.],
         [ 3.],
         [55.],
         [ 5.],
         [ 6.],
         [ 7.],
         [ 8.],
         [ 9.],
         [10.]]),
 tensor([ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.]))

In [61]:
a

tensor([ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.])

In [62]:
# Stacking
x_stacked = torch.stack([a, a, a], dim=0)
x_stacked

tensor([[ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.]])

In [63]:
x_stacked = torch.stack([a, a, a], dim=1)
x_stacked

tensor([[ 1.,  1.,  1.],
        [ 2.,  2.,  2.],
        [ 3.,  3.,  3.],
        [55., 55., 55.],
        [ 5.,  5.,  5.],
        [ 6.,  6.,  6.],
        [ 7.,  7.,  7.],
        [ 8.,  8.,  8.],
        [ 9.,  9.,  9.],
        [10., 10., 10.]])

# Squeeze and Unsqueeze
`torch.squeeze(tensor)`
`torch.unsqueeze(tensor)`

In [64]:
a_reshaped = a.reshape(1, 10)
a_reshaped, a_reshaped.shape

(tensor([[ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [65]:
a_squeezed = a.squeeze()
a_squeezed, a_squeezed.shape

(tensor([ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [66]:
a_unsqueezed = a_squeezed.unsqueeze(dim=0)
a_unsqueezed, a_unsqueezed.shape

(tensor([[ 1.,  2.,  3., 55.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [67]:
a_unsqueezed = a_squeezed.unsqueeze(dim=1)
a_unsqueezed, a_unsqueezed.shape

(tensor([[ 1.],
         [ 2.],
         [ 3.],
         [55.],
         [ 5.],
         [ 6.],
         [ 7.],
         [ 8.],
         [ 9.],
         [10.]]),
 torch.Size([10, 1]))

In [68]:
# Permute

x = torch.randn(2, 3, 5)
x.size()

torch.Size([2, 3, 5])

In [69]:
torch.permute(x, (2, 0, 1)).size()

torch.Size([5, 2, 3])

### Indexing
Similar to indexing in NumPy

In [70]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [71]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [72]:
x[0][0]

tensor([1, 2, 3])

In [73]:
x[0, 0]

tensor([1, 2, 3])

In [74]:
x[0][0][0]

tensor(1)

In [75]:
x[0, 1]

tensor([4, 5, 6])

In [76]:
# To get 9

x[0, 2, 2]

tensor(9)

In [77]:
# To get [3, 6, 9]

x[:, :, 2]

tensor([[3, 6, 9]])

## PyTorch Tensors and NumPy Arrays

* ndarray to Tensor -> `torch.from_numpy(nddarray)`

* Tensor to numpy array -> `torch.Tensor.numpy()`

In [78]:
import numpy as np
import torch

# NumPy array to Tensor

array = np.arange(1., 8.)
tensor = torch.from_numpy(array)  # PyTorch Tensor reflects datatype of the ndarray
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [79]:
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [80]:
# Tensor to NumPy array
tensor = torch.rand(2, 4)
array = tensor.numpy()
tensor, array

(tensor([[0.0773, 0.4310, 0.2058, 0.4269],
         [0.5315, 0.9993, 0.5640, 0.1272]]),
 array([[0.0773204 , 0.4309913 , 0.20581478, 0.42690223],
        [0.5315425 , 0.9992986 , 0.5640327 , 0.12717128]], dtype=float32))

In [81]:
tensor = tensor * 10
tensor, array

(tensor([[0.7732, 4.3099, 2.0581, 4.2690],
         [5.3154, 9.9930, 5.6403, 1.2717]]),
 array([[0.0773204 , 0.4309913 , 0.20581478, 0.42690223],
        [0.5315425 , 0.9992986 , 0.5640327 , 0.12717128]], dtype=float32))

## Reproducability
Taking out random in random

In [82]:
import torch
RANDOM_SEED = 23
torch.manual_seed(RANDOM_SEED)
tensor_A = torch.rand(5)

torch.manual_seed(RANDOM_SEED)
tensor_B = torch.rand(5)

tensor_A, tensor_B, tensor_A == tensor_B

(tensor([0.4283, 0.2889, 0.4224, 0.3571, 0.9577]),
 tensor([0.4283, 0.2889, 0.4224, 0.3571, 0.9577]),
 tensor([True, True, True, True, True]))

## Running on GPUs

In [83]:
!nvidia-smi

Sun Jun  4 17:25:39 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.68                 Driver Version: 531.68       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 Ti    WDDM | 00000000:01:00.0 Off |                  N/A |
| N/A   42C    P8                5W /  N/A|    496MiB /  6144MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [84]:
# To check if PyTorch is able to access GPU

import torch
torch.cuda.is_available()

True

In [85]:
# Setting up device agnostic code

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [86]:
# Count number of devices
torch.cuda.device_count()

1

In [87]:
# Creating a device instance

cuda = torch.device('cuda')
cuda

device(type='cuda')

In [88]:
# To move a tensor from cpu to gpu

tensor = torch.rand(5)
tensor, tensor.device

(tensor([0.1100, 0.2933, 0.9205, 0.5876, 0.1299]), device(type='cpu'))

In [89]:
tensor = tensor.to(cuda)
tensor, tensor.device

(tensor([0.1100, 0.2933, 0.9205, 0.5876, 0.1299], device='cuda:0'),
 device(type='cuda', index=0))

### We can't convert tensors on GPU to Numpy arrays. We have to move the tensor to CPU and then convert to numpy 
`Tensor.cpu()` -> moves GPU tensor to CPU

In [90]:
tensor = torch.rand(5)
tensor, tensor.device

(tensor([0.6729, 0.1028, 0.7876, 0.5540, 0.4653]), device(type='cpu'))

In [91]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu, tensor_on_gpu.device

(tensor([0.6729, 0.1028, 0.7876, 0.5540, 0.4653], device='cuda:0'),
 device(type='cuda', index=0))

In [92]:
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [93]:
tensor_back_on_cpu = tensor_on_gpu.cpu()
tensor_back_on_cpu, tensor_back_on_cpu.device

(tensor([0.6729, 0.1028, 0.7876, 0.5540, 0.4653]), device(type='cpu'))

In [94]:
tensor_back_on_cpu.numpy()

array([0.6728539 , 0.10275805, 0.7876373 , 0.5539505 , 0.46527135],
      dtype=float32)

## Exercises

1. Documentation reading - A big part of deep learning (and learning to code in general) is getting familiar with the documentation of a certain framework you're using. We'll be using the PyTorch documentation a lot throughout the rest of this course. So I'd recommend spending 10-minutes reading the following (it's okay if you don't get some things for now, the focus is not yet full understanding, it's awareness). See the documentation on torch.Tensor and for torch.cuda.
2. Create a random tensor with shape (7, 7).
3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).
4. Set the random seed to 0 and do exercises 2 & 3 over again.
5. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.
6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).
7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).
8. Find the maximum and minimum values of the output of 7.
9. Find the maximum and minimum index values of the output of 7.
10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [95]:
# 2

import torch
tensor = torch.rand(7, 7)
tensor, tensor.shape

(tensor([[0.2311, 0.2214, 0.3348, 0.4541, 0.2519, 0.6310, 0.1707],
         [0.3122, 0.1976, 0.5466, 0.0213, 0.9049, 0.8444, 0.9330],
         [0.2950, 0.4773, 0.4787, 0.3440, 0.6732, 0.6593, 0.1879],
         [0.4546, 0.1049, 0.7112, 0.7709, 0.5514, 0.0807, 0.3029],
         [0.7935, 0.2823, 0.5686, 0.1354, 0.4224, 0.1946, 0.0791],
         [0.2242, 0.3113, 0.9287, 0.9269, 0.7013, 0.7788, 0.6650],
         [0.2760, 0.7221, 0.4752, 0.1604, 0.4215, 0.7701, 0.4345]]),
 torch.Size([7, 7]))

In [96]:
# 3

tensor_B = torch.rand(1, 7)
tensor_B_T = tensor_B.T

# Matrix Multiplication
output_tensor = torch.matmul(tensor, tensor_B_T)
output_tensor

tensor([[1.2167],
        [2.2845],
        [1.6275],
        [1.3137],
        [1.3016],
        [2.3560],
        [1.9897]])

In [97]:
# 4

SEED = 0
torch.manual_seed(SEED)
tensor_A = torch.rand(7, 7)
tensor_B = torch.rand(1, 7)
output_tensor = torch.matmul(tensor_A, tensor_B.T)
output_tensor

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [98]:
# 5
# To set random seed on GPU, we use

torch.cuda.manual_seed(1234)

In [99]:
# 6

torch.manual_seed(1234)
device = "cuda" if torch.cuda.is_available() else "cpu"

tensor_A = torch.rand(2, 3).to(device)
tensor_B = torch.rand(2, 3).to(device)

In [100]:
# 7

output = torch.matmul(tensor_A, tensor_B.T)
output

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]], device='cuda:0')

In [101]:
# 8

output.min(), output.max()

(tensor(0.3647, device='cuda:0'), tensor(0.5617, device='cuda:0'))

In [102]:
# 9

output.argmin(), output.argmax()

(tensor(0, device='cuda:0'), tensor(3, device='cuda:0'))

In [103]:
# 10

torch.manual_seed(7)
tensor = torch.rand(1, 1, 1, 10)
tensor, tensor.shape

(tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
            0.3653, 0.8513]]]]),
 torch.Size([1, 1, 1, 10]))

In [104]:
sq_tensor = tensor.squeeze()
sq_tensor, sq_tensor.shape

(tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
         0.8513]),
 torch.Size([10]))