## Three most common errors in PyTorch:

1. Shape mismatch
2. Device mismatch
3. Data type mismatch

| Problem Space | Pre-built datsets and Functions |
| --- | --- |
| Vision | torchvision.datasets |
| Text | torchtext.datasets |
| Audio | torchaudio.datasets |
| Recommendation system | torchrec.datasets |
| Bonus | TorchData* |

## Loss curves

Refer [this link](https://developers.google.com/machine-learning/testing-debugging/metrics/interpretic).
<br/>
![Loss curves](./images/loss_curves.png)

## Points to note:

- When you're first approaching a machine learning problem: **always** start small and if something works, scale it up. Your first batch of experiments should take no longer than a few seconds to a few minutes to run. The quicker you can experiment, the faster you can work out what doesn't work.

## Torch Basics

### Scalar, Vector, Matrix, Tensor

In [1]:
import torch

# Scalar
print("-----------------SCALAR-----------------")
scalar = torch.tensor(7)
print("scalar =>", scalar)
print("item =>", scalar.item())
print("ndim =>", scalar.ndim)
print("shape =>", scalar.shape)

# Vector
print("-----------------VECTOR-----------------")
vector = torch.tensor([1, 2])
print("vector =>", vector)
print("ndim =>", vector.ndim)
print("shape =>", vector.shape)

# Matrix
print("-----------------MATRIX-----------------")
MATRIX = torch.tensor([
    [1, 2],
    [3, 4]
])
print("matrix =>", MATRIX)
print("ndim =>", MATRIX.ndim)
print("shape =>", MATRIX.shape)

# Tensor
print("-----------------TENSOR-----------------")
TENSOR = torch.tensor([
    [
      [1, 2, 1],
      [3, 4, 3]
    ],
    [
      [4, 5, 4],
      [6, 7, 6]
    ]
])
print("tensor =>", TENSOR)
print("ndim =>", TENSOR.ndim)
print("shape =>", TENSOR.shape)
print(TENSOR[0])

# Random TENSOR
print("-------------Random TENSOR-------------")
random_tensor = torch.rand(3, 4)
print("random_tensor =>", random_tensor)
print("ndim =>", random_tensor.ndim)
print("shape =>", random_tensor.shape)

-----------------SCALAR-----------------
scalar => tensor(7)
item => 7
ndim => 0
shape => torch.Size([])
-----------------VECTOR-----------------
vector => tensor([1, 2])
ndim => 1
shape => torch.Size([2])
-----------------MATRIX-----------------
matrix => tensor([[1, 2],
        [3, 4]])
ndim => 2
shape => torch.Size([2, 2])
-----------------TENSOR-----------------
tensor => tensor([[[1, 2, 1],
         [3, 4, 3]],

        [[4, 5, 4],
         [6, 7, 6]]])
ndim => 3
shape => torch.Size([2, 2, 3])
tensor([[1, 2, 1],
        [3, 4, 3]])
-------------Random TENSOR-------------
random_tensor => tensor([[0.8447, 0.4880, 0.3170, 0.6027],
        [0.3176, 0.3862, 0.3130, 0.7451],
        [0.3243, 0.1313, 0.5807, 0.6017]])
ndim => 2
shape => torch.Size([3, 4])


### Methods for creating tensors

In [2]:
matrix1 = torch.rand(3, 4)
zeros = torch.zeros(3, 4)
ones = torch.ones(3, 4)

print(zeros.dtype)

matrix1 * zeros

torch.float32


tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [3]:
tensor_1_2_10 = torch.arange(start=1, end=11, step=2)
print(tensor_1_2_10)

zeros_like = torch.zeros_like(tensor_1_2_10)
print(zeros_like)

tensor([1, 3, 5, 7, 9])
tensor([0, 0, 0, 0, 0])


In [4]:
# device="cuda" if gpu available
tensor1 = torch.rand(3, 4, dtype=torch.float16, device="cpu")
tensor2 = torch.rand(3, 4, dtype=torch.float32, device="cpu")

print(tensor1.dtype)
print(tensor2.dtype)

tensor1 * tensor2

torch.float16
torch.float32


tensor([[0.0149, 0.1095, 0.4213, 0.4036],
        [0.2675, 0.0276, 0.0862, 0.5308],
        [0.7090, 0.1755, 0.6263, 0.4737]])

In [5]:
tensor1 = tensor1.type(torch.float64, non_blocking=True)
print(tensor1.dtype)

tensor1 = tensor1.to(device="cpu", non_blocking=True)
tensor1.device

torch.float64


device(type='cpu')

In [9]:
tensor1 = torch.tensor([[1, 2, 3], [1, 2, 4]])
tensor2 = torch.tensor([[1], [2], [3]])

tensor1.matmul(tensor2)

tensor([[14],
        [17]])

In [10]:
tensor1.mean(dtype=torch.float32)

tensor(2.1667)

In [11]:
tensor1.argmax()

tensor(5)

### Reshape tensors

In [12]:
tensor1 = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

reshaped_tensor1 = tensor1.reshape(2, 5)
reshaped_tensor1

tensor([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]])

In [13]:
view_tensor1 = tensor1.view(2, 5)
view_tensor1

tensor([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]])

### Combine tensors

In [14]:
stacked_tensor = torch.stack([tensor1, tensor1, tensor1], dim=0)
stacked_tensor, stacked_tensor.shape

(tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
         [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
         [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]]),
 torch.Size([3, 10]))

In [15]:
stacked_tensor = torch.stack([tensor1, tensor1, tensor1], dim=1)
stacked_tensor, stacked_tensor.shape

(tensor([[ 1,  1,  1],
         [ 2,  2,  2],
         [ 3,  3,  3],
         [ 4,  4,  4],
         [ 5,  5,  5],
         [ 6,  6,  6],
         [ 7,  7,  7],
         [ 8,  8,  8],
         [ 9,  9,  9],
         [10, 10, 10]]),
 torch.Size([10, 3]))

In [16]:
new_tensor = torch.cat([tensor1, tensor1, tensor1])
new_tensor, new_tensor.shape

(tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  1,  2,  3,  4,  5,  6,  7,  8,
          9, 10,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),
 torch.Size([30]))

### Add/Remove tensor dimension

In [17]:
unsqueezed = tensor1.unsqueeze(dim=1)
unsqueezed, unsqueezed.shape

(tensor([[ 1],
         [ 2],
         [ 3],
         [ 4],
         [ 5],
         [ 6],
         [ 7],
         [ 8],
         [ 9],
         [10]]),
 torch.Size([10, 1]))

In [18]:
squeezed = unsqueezed.squeeze()
squeezed, squeezed.shape

(tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]), torch.Size([10]))

### Reorder tensor dimensions

In [19]:
x = torch.rand(2, 3, 5)
x, x.shape

(tensor([[[0.8599, 0.0970, 0.9733, 0.5139, 0.5890],
          [0.3366, 0.5394, 0.4600, 0.5773, 0.1522],
          [0.9060, 0.6984, 0.8639, 0.5572, 0.7409]],
 
         [[0.2316, 0.1701, 0.2129, 0.6462, 0.0536],
          [0.1137, 0.3682, 0.4395, 0.5995, 0.4777],
          [0.5148, 0.8357, 0.7044, 0.2633, 0.2452]]]),
 torch.Size([2, 3, 5]))

In [20]:
x_view = x.permute(2, 0, 1)
x_view, x_view.shape

(tensor([[[0.8599, 0.3366, 0.9060],
          [0.2316, 0.1137, 0.5148]],
 
         [[0.0970, 0.5394, 0.6984],
          [0.1701, 0.3682, 0.8357]],
 
         [[0.9733, 0.4600, 0.8639],
          [0.2129, 0.4395, 0.7044]],
 
         [[0.5139, 0.5773, 0.5572],
          [0.6462, 0.5995, 0.2633]],
 
         [[0.5890, 0.1522, 0.7409],
          [0.0536, 0.4777, 0.2452]]]),
 torch.Size([5, 2, 3]))

In [21]:
import torch

x = torch.tensor([[
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]])
print(x[0][2][2])
print(x[:, :, 2])

tensor(9)
tensor([[3, 6, 9]])


In [22]:
x.shape

torch.Size([1, 3, 3])

### Seeding

In [25]:
RANDOM_SEED = 42

torch.cuda.manual_seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
x = torch.rand(2, 3)
torch.manual_seed(RANDOM_SEED)
y = torch.rand(2, 3)
print(x)
print(y)

tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009]])
tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009]])


### Device agnostic code

In [None]:
!nvidia-smi

In [27]:
print(torch.cuda.is_available())
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
torch.cuda.device_count()

False
cpu


0

In [28]:
import torch

RANDOM_SEED = 0
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
x = torch.rand(7, 7, device=DEVICE)
x

tensor([[0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901],
        [0.8964, 0.4556, 0.6323, 0.3489, 0.4017, 0.0223, 0.1689],
        [0.2939, 0.5185, 0.6977, 0.8000, 0.1610, 0.2823, 0.6816],
        [0.9152, 0.3971, 0.8742, 0.4194, 0.5529, 0.9527, 0.0362],
        [0.1852, 0.3734, 0.3051, 0.9320, 0.1759, 0.2698, 0.1507],
        [0.0317, 0.2081, 0.9298, 0.7231, 0.7423, 0.5263, 0.2437],
        [0.5846, 0.0332, 0.1387, 0.2422, 0.8155, 0.7932, 0.2783]])

### Tensor operations

In [29]:
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
y = torch.rand(1, 7, device=DEVICE)

x.matmul(y.T)

tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])

In [30]:
CUDA_SEED = 1234

torch.manual_seed(CUDA_SEED)
x = torch.rand(2, 3, device=DEVICE)

torch.manual_seed(CUDA_SEED)
y = torch.rand(2, 3, device=DEVICE)

In [31]:
print(x)
print(y)

tensor([[0.0290, 0.4019, 0.2598],
        [0.3666, 0.0583, 0.7006]])
tensor([[0.0290, 0.4019, 0.2598],
        [0.3666, 0.0583, 0.7006]])


In [32]:
res = x.matmul(y.T)
res

tensor([[0.2299, 0.2161],
        [0.2161, 0.6287]])

In [33]:
print(res.min(), res.max())
print(res.argmin(), res.argmax())

tensor(0.2161) tensor(0.6287)
tensor(1) tensor(3)


In [34]:
torch.manual_seed(7)
x = torch.rand(1, 1, 1, 10)

squeezed = x.squeeze()
print(squeezed.shape)

torch.Size([10])


In [35]:
print(x)
print(squeezed)

tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513])
