In [4]:
import torch

torch.__version__
print(torch.cuda.device_count())

0


## Creating tensors

- Scalar
- Vector
- Matrix
- Tensor

In [3]:
# 1. scalar
scalar = torch.tensor(7)
print("shape = ", scalar.shape)
print("ndim = ", scalar.ndim)
print("scalar.item() = ", scalar.item())

shape =  torch.Size([])
ndim =  0
scalar.item() =  7


In [3]:
# 2. vector
vector = torch.tensor([7, 7, 8])
print("shape = ", vector.shape)
print("ndim = ", vector.ndim)

shape =  torch.Size([3])
ndim =  1


In [5]:
# 3. MATRIX
MATRIX = torch.tensor([[1, 2, 3],
                      [2, 4, 5]])
print(MATRIX)
print("shape = ", MATRIX.shape)
print("ndim = ", MATRIX.ndim)

tensor([[1, 2, 3],
        [2, 4, 5]])
shape =  torch.Size([2, 3])
ndim =  2


In [5]:
# 4. TENSOR
TENSOR = torch.tensor([[
    [1, 2, 3, 4],
    [3, 6, 9, 12],
    [2, 4, 5, 9] ],
    [[10, 20, 30, 40],
    [30, 60, 90, 120],
    [20, 40, 50, 90]]])
print(TENSOR)
print(TENSOR.shape)
print(TENSOR.ndim) # == number of brackets

tensor([[[  1,   2,   3,   4],
         [  3,   6,   9,  12],
         [  2,   4,   5,   9]],

        [[ 10,  20,  30,  40],
         [ 30,  60,  90, 120],
         [ 20,  40,  50,  90]]])
torch.Size([2, 3, 4])
3


## Random Tensors


In [6]:
# random_tensor = torch.rand(size=(224, 224, 3))
random_tensor = torch.rand(224, 224, 3)
print(random_tensor.dtype)
print(random_tensor.ndim)
print(random_tensor.shape)
print("random_tensor = ", random_tensor)

torch.float32
3
torch.Size([224, 224, 3])
random_tensor =  tensor([[[8.3810e-01, 3.5759e-01, 2.4085e-01],
         [2.3288e-01, 8.8651e-01, 9.6280e-01],
         [2.0873e-01, 1.3244e-01, 5.6792e-01],
         ...,
         [6.6686e-01, 9.8580e-01, 7.3381e-01],
         [3.4252e-01, 6.2527e-02, 1.9630e-01],
         [1.1508e-01, 4.2583e-01, 2.4190e-01]],

        [[9.3329e-01, 9.1714e-01, 9.5501e-01],
         [9.8000e-01, 7.6415e-01, 7.0096e-01],
         [7.3076e-01, 4.8098e-01, 9.6802e-01],
         ...,
         [4.2055e-01, 9.3231e-01, 4.7745e-01],
         [5.4122e-01, 8.9984e-01, 6.8282e-01],
         [1.2178e-03, 8.8838e-01, 9.9388e-01]],

        [[9.2399e-01, 5.2803e-01, 2.6855e-01],
         [9.3570e-01, 2.0317e-01, 2.5086e-01],
         [8.5419e-01, 5.2427e-01, 7.4775e-02],
         ...,
         [5.7344e-01, 4.5856e-01, 5.8683e-01],
         [8.5996e-01, 1.6108e-01, 4.7880e-01],
         [7.1471e-01, 7.7475e-01, 3.9974e-02]],

        ...,

        [[4.0168e-01, 6.6538e-01,

## Zeros and ones

In [7]:
zeros = torch.zeros(size=(3, 4))
print(zeros)

ones = torch.ones(3,4)
print(ones)



tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


## Creating a range and tensors like

In [9]:
# zero_to_ten_deprecated = torch.range(0, 10)
# print(zero_to_ten_deprecated)

zeros_to_ten = torch.arange(start=0, end=10, step=1)
print(zeros_to_ten)

ten_zeros = torch.zeros_like(input=zeros_to_ten)
print(ten_zeros)
ten_ones = torch.ones_like(input=zeros_to_ten)
print(ten_ones)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


## Tensor datatypes

- For precision in computing!
- The higher the precision value (8, 16, 32), the more detail and hence data used to express a number.
- Generally if you see `torch.cuda` anywhere, the tensor is being used for GPU (since Nvidia GPUs use a computing toolkit called CUDA).
- The most common type (and generally the default) is `torch.float32` or `torch.float`

In [8]:
# For precision in computing!
# The higher the precision value (8, 16, 32), the more detail and hence data used to express a number.
# `torch.float32` or `torch.float`

# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type. 'cpu', 'cuda', 'cuda:0'
                               requires_grad=False) # if True, operations performed on the tensor are recorded 


float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [10]:
# 1. CPU
cpu_tensor = torch.tensor([1, 2, 3], device='cpu')
# or simply
cpu_tensor = torch.tensor([1, 2, 3])  # defaults to CPU
print(cpu_tensor.device)  # Output: device(type='cpu')


cpu


In [12]:
# 2. GPU (if available)
if torch.cuda.is_available():
    gpu_tensor = torch.tensor([1, 2, 3], device='cuda')
    # or
    gpu_tensor = torch.tensor([1, 2, 3], device='cuda:0')  # first GPU
    print(gpu_tensor.device)  # Output: device(type='cuda', index=0)

In [14]:
# 3. Moving tensors between devices:

cpu_tensor = torch.tensor([1, 2, 3])
if torch.cuda.is_available():
    gpu_tensor = cpu_tensor.to('cuda')
    print(gpu_tensor.device)  # Output: device(type='cuda', index=0)
    
    # Move back to CPU
    new_cpu_tensor = gpu_tensor.to('cpu')
    print(new_cpu_tensor.device)  # Output: device(type='cpu')

In [15]:
# 4. Multiple GPUs:
if torch.cuda.device_count() > 1:
    gpu_1_tensor = torch.tensor([1, 2, 3], device='cuda:1')  # second GPU
    print(gpu_1_tensor.device)  # Output: device(type='cuda', index=1)


# Tensor operations

## Basic operation

- Addition, Substraction


In [17]:
# Create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

# Subtract and reassign
tensor = tensor - 10
tensor

# Multiply it by 10
tensor * 10
# Can also use torch functions
torch.multiply(tensor, 10)

tensor([-90, -80, -70])

## Multiplication

### `torch.mul` vs. `torch.matmul` (or @)

In [18]:
import torch

print("----1. Matrix element-wise multiplication----")
tensor = torch.tensor([1, 2, 3])
print(tensor * tensor)
print(torch.mul(tensor, tensor))
print(torch.mul(tensor, tensor))

print("----2. Matrix multiplication (or @) ----")
tensor = torch.tensor([1, 2, 3])
print(torch.matmul(tensor, tensor))
print(tensor @ tensor)


tensor([1, 4, 9])
tensor([1, 4, 9])
tensor([1, 4, 9])
----Matrix multiplication----
tensor(14)
tensor(14)


### `matmul()` time

In [19]:
%%time

# tensor = torch.tensor([1, 2, 3])
tensor = torch.arange(start=0, end=1000000, step=1)

# 1. COMPUTE manually compute sum of Element-wise multiplication
# Matrix multiplication by hand 
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
print(len(tensor))
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value


1000000
CPU times: user 3.81 s, sys: 1.55 ms, total: 3.81 s
Wall time: 3.76 s


tensor(333332833333500000)

In [21]:
%%time

# 2. COMPUTE with Matric Multiplication: Faster
torch.matmul(tensor, tensor)


CPU times: user 1.2 ms, sys: 254 μs, total: 1.45 ms
Wall time: 720 μs


tensor(333332833333500000)

## COMMON ERRORS

## Common issues: mismatch in tensor `shape`, `datatype` and `device`

For example, one of tensors is torch.float32 and the other is torch.float16 (PyTorch often likes tensors to be the same format).

Or one of your tensors is on the CPU and the other is on the GPU (PyTorch likes calculations between tensors to be on the same device).


In [23]:
# Getting information from tensors
print("shape - what shape is the tensor? (some operations require specific shape rules)")
print("dtype - what datatype are the elements within the tensor stored in?")
print("device - what device is the tensor stored on? (usually GPU or CPU)\n")

# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print(f"Dimension (ndim) of tensor: {some_tensor.ndim}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

shape - what shape is the tensor? (some operations require specific shape rules)
dtype - what datatype are the elements within the tensor stored in?
device - what device is the tensor stored on? (usually GPU or CPU)

tensor([[0.4421, 0.9540, 0.1632, 0.5064],
        [0.3561, 0.1311, 0.2433, 0.4058],
        [0.4824, 0.8748, 0.1188, 0.6754]])
Dimension (ndim) of tensor: 2
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [25]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

# ERROR: 3x2 * 3x2
torch.matmul(tensor_A, tensor_B) # (this will error)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [27]:
print(torch.matmul(tensor_A, tensor_B.T))
print(torch.mm(tensor_A, tensor_B.T))

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])
tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])


### Matrix Multiplication visual

- http://matrixmultiplication.xyz/


## Neural networks are full of matrix multiplications and dot products.

The `torch.nn.Linear()` module (we'll see this in action later on), also known as a feed-forward layer or fully connected layer, implements a matrix multiplication between an input x and a weights matrix A.


$y = x \cdot A^T + b$

Try changing the values of in_features and out_features below and see what happens.

In [31]:
# Since the linear layer starts with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)
# This uses matrix multiplication
# creates a linear layer, which is indeed a linear function with weights and bias.
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input 
                         out_features=6) # out_features = describes outer value 
print(linear.weight)
print(linear.bias)

x = torch.tensor([[1, 2],
                [3, 4],
                [5, 6]], dtype=torch.float32)
# output = x cdot A^T + b
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Parameter containing:
tensor([[ 0.5406,  0.5869],
        [-0.1657,  0.6496],
        [-0.1549,  0.1427],
        [-0.3443,  0.4153],
        [ 0.6233, -0.5188],
        [ 0.6146,  0.1323]], requires_grad=True)
Parameter containing:
tensor([ 0.5224,  0.0958,  0.3410, -0.0998,  0.5451,  0.1045],
       requires_grad=True)
Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


In [35]:
import torch
import torch.nn as nn
import torch.optim as optim

# Set random seed for reproducibility
torch.manual_seed(42)

# Define the linear model
linear = nn.Linear(in_features=2, out_features=6)

# Print initial parameters
print("Initial weights:")
print(linear.weight)
print("\nInitial bias:")
print(linear.bias)

# Create some dummy data
x = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.float32)
y = torch.tensor([[10, 20, 30, 40, 50, 60],
                  [15, 25, 35, 45, 55, 65],
                  [20, 30, 40, 50, 60, 70]], dtype=torch.float32)

# Define loss function
criterion = nn.MSELoss()

# Define optimizer
optimizer = optim.SGD(linear.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = linear(x)
    loss = criterion(outputs, y)
    
    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Print progress and parameters at certain intervals
    if (epoch + 1) % 250 == 0:
        print(f'\nEpoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        print("Current weights:")
        print(linear.weight)
        print("Current bias:")
        print(linear.bias)

# Print final parameters
print("\nFinal weights:")
print(linear.weight)
print("\nFinal bias:")
print(linear.bias)

# Test the model
with torch.no_grad():
    test_input = torch.tensor([[7, 8]], dtype=torch.float32)
    initial_prediction = linear(test_input)
    print("\nPrediction for input [7, 8]:")
    print(initial_prediction)

# Compare initial and final outputs
initial_output = linear(x)
print("\nInitial output:")
print(initial_output)
print("\nFinal output:")
print(outputs)
print("\nTarget output:")
print(y)

Initial weights:
Parameter containing:
tensor([[ 0.5406,  0.5869],
        [-0.1657,  0.6496],
        [-0.1549,  0.1427],
        [-0.3443,  0.4153],
        [ 0.6233, -0.5188],
        [ 0.6146,  0.1323]], requires_grad=True)

Initial bias:
Parameter containing:
tensor([ 0.5224,  0.0958,  0.3410, -0.0998,  0.5451,  0.1045],
       requires_grad=True)

Epoch [250/1000], Loss: 132.7760
Current weights:
Parameter containing:
tensor([[ 1.2352,  2.2655],
        [ 0.9755,  4.3447],
        [ 1.3678,  5.7714],
        [ 1.3095,  7.7035],
        [ 2.4315,  8.4713],
        [ 2.3073, 10.4693]], requires_grad=True)
Current bias:
Parameter containing:
tensor([1.5064, 2.6497, 4.4469, 5.5346, 7.7271, 8.7488], requires_grad=True)

Epoch [500/1000], Loss: 86.4376
Current weights:
Parameter containing:
tensor([[ 0.8663,  2.4411],
        [-0.0640,  4.8395],
        [-0.3422,  6.5854],
        [-1.0912,  8.8463],
        [-0.6659,  9.9457],
        [-1.4808, 12.2724]], requires_grad=True)
Current b


## Finding the min, max, mean, sum, etc (aggregation)

In [36]:
x = torch.arange(0, 100, 10)

print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")


torch.min(x), torch.max(x), torch.mean(x.type(torch.float32)), torch.sum(x)

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


(tensor(0), tensor(90), tensor(45.), tensor(450))

In [24]:
import torch

# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")


Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


In [9]:
import torch

tensor = torch.arange(10, 100, 10)
print("Change tensor datatype")
# Create a tensor and check its datatype
tensor = torch.arange(10., 100., 10.)
print(tensor.dtype)
print(tensor)

# Create a float16 tensor
tensor_float16 = tensor.type(torch.float16)
print(tensor_float16.dtype)
print(tensor_float16)

# Create a int8 tensor
tensor_int8 = tensor.type(torch.int8)
print(tensor_int8.dtype)
print(tensor_int8)


Change tensor datatype
torch.float32
tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.])
torch.float16
tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)
torch.int8
tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)


## Reshaping, stacking, squeezing and unsqueezing

Method	| One-line description
---|---
`torch.reshape(input, shape)`	| Reshapes input to shape (if compatible), can also use torch.Tensor.reshape().
`Tensor.view(shape)`	|Returns a view of the original tensor in a different shape but shares the same data as the original tensor.
`torch.stack(tensors, dim=0)`	|Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.
`torch.squeeze(input)`	|Squeezes input to remove all the dimenions with value 1.
`torch.unsqueeze(input, dim)`	|Returns input with a dimension value of 1 added at dim.
`torch.permute(input, dims)`	|Returns a view of the original input with its dimensions permuted (rearranged) to dims.

In [1]:
import torch

x = torch.arange(0., 10.)
print(x)
print(x.shape)

x_reshaped = x.reshape(10)
print(x_reshaped)
x_reshaped = x.reshape(1, 10)
print(x_reshaped)


tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
torch.Size([10])
tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
tensor([[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])


## view
A tensor view in PyTorch is a way to create a new tensor that shares the same data as the original tensor but has a different shape or strides.

Views are used to reshape, transpose, or otherwise change the view of the data without copying the underlying data.
This makes operations more memory-efficient and faster.

In [2]:
import torch

print(x)
z = x.view(2, 5)
print(z)

# Changing view (z) changes x
z[:, 0] = 5
z, x


tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])


(tensor([[5., 1., 2., 3., 4.],
         [5., 6., 7., 8., 9.]]),
 tensor([5., 1., 2., 3., 4., 5., 6., 7., 8., 9.]))

## Stack

In [3]:
import torch

# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0) # try changing dim to dim=1 and see what happens
print(x_stacked)

x_stacked = torch.stack([x, x, x, x], dim=1)

print(x_stacked)


tensor([[5., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])
tensor([[5., 5., 5., 5.],
        [1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])


## Squeeze

In [4]:
import torch

# Stack tensors on top of each other
print("---Squeeze---")
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

print("---Unsqueeze---")
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# And to do the reverse of torch.squeeze() you can use torch.unsqueeze() to add a dimension value of 1 at a specific index.
## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

---Squeeze---
Previous tensor: tensor([[5., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 10])

New tensor: tensor([5., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([10])
---Unsqueeze---
Previous tensor: tensor([5., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([10])

New tensor: tensor([[5., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 10])


## Permute

In [5]:
import torch

print("-----Permute-----")
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

-----Permute-----
Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


## Indexing (selecting data from tensors)

Sometimes you'll want to select specific data from tensors (for example, only the first column or second row).

In [28]:
# Create a tensor 
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
print("x = ", x)
print("x.shape = ", x.shape)
print("--------")

# Let's index bracket by bracket
print(f"x[0] = First square bracket = {x[0]}") 
print(f"x[0][0] = Second square bracket = {x[0][0]}") 
print(f"x[0][0][0] = Third square bracket = {x[0][0][0]}")

print(x[:, :, 0])

# You can also use : to specify "all values in this dimension" and then use a comma (,) to add another dimension.
# Get all values of 0th dimension and the 0 index of 1st dimension
print(x[:, 0])
# Get all values of 0th & 1st dimensions but only index 1 of 2nd dimension
print(x[:, :, 1])
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
print(x[:, 1, 1])
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
print(x[0, 0, :]) # same as x[0][0]

x =  tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
x.shape =  torch.Size([1, 3, 3])
--------
x[0] = First square bracket = tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
x[0][0] = Second square bracket = tensor([1, 2, 3])
x[0][0][0] = Third square bracket = 1
tensor([[1, 4, 7]])
tensor([[1, 2, 3]])
tensor([[2, 5, 8]])
tensor([5])
tensor([1, 2, 3])


## PyTorch tensors & NumPy

In [23]:
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))