## Import Pytorch
Check if Pytorch is available in the environment.

In [1]:
import torch
x = torch.rand(5, 3)
print(x)

tensor([[0.1491, 0.6564, 0.1395],
        [0.9654, 0.5536, 0.9790],
        [0.5554, 0.3806, 0.3780],
        [0.7797, 0.6439, 0.9923],
        [0.6004, 0.6136, 0.2194]])


In [3]:
torch.__version__

'2.5.1+cu124'

## Tensor
Tensors are the fundamental building block of machine learning.
Their job is to represent data in a numerical way.

### Key learning points
- [Data types](https://pytorch.org/docs/stable/tensors.html#data-types)
- Initialize tensor of different shapes
- Manipulate tensor

#### Different Shapes of Tensor

| Name   | What is it?                                                                 | Number of dimensions                                                                 | Lower or upper (usually/example)                                                                 |
|--------|-----------------------------------------------------------------------------|--------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|
| scalar | a single number                                                             | 0                                                                                    | Lower (a)                                                                                        |
| vector | a number with direction (e.g. wind speed with direction) but can also have many other numbers | 1                                                                                    | Lower (y)                                                                                        |
| matrix | a 2-dimensional array of numbers                                            | 2                                                                                    | Upper (Q)                                                                                        |
| tensor | an n-dimensional array of numbers                                           | can be any number, a 0-dimension tensor is a scalar, a 1-dimension tensor is a vector | Upper (X)                                                                                        |

In [4]:
# Create a tensor
# scalar
a = torch.tensor(1.0)

# vector
y = torch.tensor([1.0, 2.0, 3.0, 4.0])

# matrix
Q = torch.tensor([[1.0, 2.0], [3.0, 4.0]])

# N-dimensional tensor
X = torch.tensor([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]])

# random tensor
R = torch.rand(size=(3, 4))

# all zeros tensor
Z = torch.zeros(size=(3, 4))

# all ones tensor
O = torch.ones(size=(3, 4))

# use arange to create a tensor
zero_to_ten = torch.arange(start=0, end=10, step=2)

# use zeros_like to create a tensor
zeros_tensor = torch.zeros_like(zero_to_ten)

# print the tensor
print("scalar:", a)
print("vector: ", y)
print("matrix:", Q)
print("tensor:", X)
print("random tensor:", R)
print("all zeros tensor:", Z)
print("all ones tensor:", O)
print("zero to ten:", zero_to_ten)
print("zeros tensor:", zeros_tensor)

scalar: tensor(1.)
vector:  tensor([1., 2., 3., 4.])
matrix: tensor([[1., 2.],
        [3., 4.]])
tensor: tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])
random tensor: tensor([[0.8365, 0.4091, 0.5110, 0.6634],
        [0.2815, 0.1100, 0.4280, 0.1230],
        [0.8911, 0.0910, 0.4915, 0.1522]])
all zeros tensor: tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
all ones tensor: tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
zero to ten: tensor([0, 2, 4, 6, 8])
zeros tensor: tensor([0, 0, 0, 0, 0])


#### Get the Structure of Tensor
- `.ndim`
- `.item`
- `.shape`

In [5]:
# dimensions of the tensor
print("scalar:", a.ndim)
print("vector:", y.ndim)
print("matrix:", Q.ndim)
print("tensor:", X.ndim)

scalar: 0
vector: 1
matrix: 2
tensor: 3


#### Data Types
Here we specify data type when creating a tensor.

In [6]:
# Create a tensor without specifying the data type
dtype_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0],
                            dtype=None,
                            device=None,
                            requires_grad=False)
print("Data type: ", dtype_tensor.dtype)

Data type:  torch.float32


In [7]:
import math

print(math.pi, math.e)

# Create a tensor with a specific data type of 32-bit floating point
float32_tensor = torch.tensor([math.pi, math.e],
                              dtype=torch.float32,
                              device=None,
                              requires_grad=False)

# print the difference between math.pi and its representation in the tensor
print("diff for pi: ", math.pi - float32_tensor[0].item())

float32_tensor[0].item(), float32_tensor.element_size(), float32_tensor.dtype, float32_tensor.device

3.141592653589793 2.718281828459045
diff for pi:  -8.742278012618954e-08


(3.1415927410125732, 4, torch.float32, device(type='cpu'))

In [8]:
import math

print(math.pi, math.e)

# Create a tensor with a specific data type of 64-bit floating point
float64_tensor = torch.tensor([math.pi, math.e],
                              dtype=torch.float64,
                              device=None,
                              requires_grad=False)

# print the difference between math.pi and its representation in the tensor
print("diff for pi: ", math.pi - float64_tensor[0].item())

float64_tensor[0].item(), float64_tensor.element_size(), float64_tensor.dtype, float64_tensor.device

3.141592653589793 2.718281828459045
diff for pi:  0.0


(3.141592653589793, 8, torch.float64, device(type='cpu'))

## Manipulate Tensors
- addition
- subtraction
- multiplication (element-wise)
- division
- matrix multiplication

In [9]:
## addition
tensor = torch.tensor([1.0, 2.0, 3.0, 4.0])
print("tensor + tensor = ", tensor + tensor)
print("tensor + 10 = ", tensor + 10)

## subtraction
print("tensor - tensor = ", tensor - tensor)
print("tensor - 10 = ", tensor - 10)

## multiplication (element-wise)
print("tensor * tensor = ", tensor * tensor)
print("tensor * 10 = ", tensor * 10)
print("torch.mul(tensor, 10) = ", torch.mul(tensor, 10))

## division
print("tensor / tensor = ", tensor / tensor)
print("tensor / 10 = ", tensor / 10)

## dot product
print("torch.matmul(tensor, tensor) = ", torch.matmul(tensor, tensor))

tensor + tensor =  tensor([2., 4., 6., 8.])
tensor + 10 =  tensor([11., 12., 13., 14.])
tensor - tensor =  tensor([0., 0., 0., 0.])
tensor - 10 =  tensor([-9., -8., -7., -6.])
tensor * tensor =  tensor([ 1.,  4.,  9., 16.])
tensor * 10 =  tensor([10., 20., 30., 40.])
torch.mul(tensor, 10) =  tensor([10., 20., 30., 40.])
tensor / tensor =  tensor([1., 1., 1., 1.])
tensor / 10 =  tensor([0.1000, 0.2000, 0.3000, 0.4000])
torch.matmul(tensor, tensor) =  tensor(30.)


In [10]:
%%time
print("torch.matmul", torch.matmul(tensor, tensor))

print("tensor @ tensor = ", tensor @ tensor)

torch.matmul tensor(30.)
tensor @ tensor =  tensor(30.)
CPU times: user 1.32 ms, sys: 19 μs, total: 1.34 ms
Wall time: 915 μs


In [11]:
# the inner dimensions of the two matrices must be the same
# the resulting matrix will have the shape of the outer dimensions
# i.e. (2, 3) @ (3, 2) = (2, 2)
tensor_mat = torch.rand(2, 3) @ torch.rand(3, 4)
print("(2,3) @ (3,2) = ", tensor_mat)
print("shape: ", tensor_mat.shape)

(2,3) @ (3,2) =  tensor([[0.7828, 0.7684, 1.1607, 0.8876],
        [0.5300, 0.5351, 0.7048, 0.7582]])
shape:  torch.Size([2, 4])


In [12]:
# transpose
tensorA = torch.tensor([[1, 2], [3, 4], [5, 6]])
print("tensorA: ", tensorA, "shape: ", tensorA.shape)
print("tensorA.T: ", tensorA.T, "shape: ", tensorA.T.shape)
print("tensorA.t(): ", tensorA.t(), "shape: ", tensorA.t().shape)

tensorA:  tensor([[1, 2],
        [3, 4],
        [5, 6]]) shape:  torch.Size([3, 2])
tensorA.T:  tensor([[1, 3, 5],
        [2, 4, 6]]) shape:  torch.Size([2, 3])
tensorA.t():  tensor([[1, 3, 5],
        [2, 4, 6]]) shape:  torch.Size([2, 3])


In [15]:
# find the max, min, mean, and sum of the tensor
print("max: ", tensor.max())
print("min: ", tensor.min())
print("mean: ", tensor.mean())

max:  tensor(4.)
min:  tensor(1.)
mean:  tensor(2.5000)


In [3]:
# the following code will end up with a RuntimeError
# RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long
tensor_int64 = torch.tensor([1, 2, 3, 4], dtype=torch.int64)
# print("mean: ", tensor_int64.mean())

# to fix the error, convert the tensor to a floating point tensor
tensor_int64.type(torch.float64).mean()

tensor(2.5000, dtype=torch.float64)

In [None]:
# reshape the tensor
tensor = torch.arange(9)
print("tensor: ", tensor)
print("tensor.reshape(3, 3): ", tensor.reshape(3, 3))

# change the view
z = tensor.view(3, 3)
print("z: ", z)
print("tensor: ", tensor)

# z and tensor share the same memory
z[0, 0] = -1
print("z: ", z)
print("tensor: ", tensor)

tensor:  tensor([0, 1, 2, 3, 4, 5, 6, 7, 8])
tensor.reshape(3, 3):  tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
z:  tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
tensor:  tensor([0, 1, 2, 3, 4, 5, 6, 7, 8])
z:  tensor([[-1,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]])
tensor:  tensor([-1,  1,  2,  3,  4,  5,  6,  7,  8])


In [11]:
# stack
x = torch.randn(2, 3)
y = torch.randn(2, 3)
print("x: ", x)
print("y: ", y)

x:  tensor([[-1.2129,  0.5628, -0.1500],
        [ 0.6058, -0.3978,  0.3751]])
y:  tensor([[ 0.0060, -0.2693, -1.6171],
        [ 0.5033,  0.0055,  1.5146]])


In [None]:
# stack along the first dimension
print(torch.stack((x, y), dim=0))
print(torch.stack([x, y]))

# stack along the second dimension
print(torch.stack((x, y), dim=1))

# stack along the third dimension
print(torch.stack((x, y), dim=2))
print(torch.stack((x, y), dim=-1))


tensor([[[-1.2129,  0.5628, -0.1500],
         [ 0.6058, -0.3978,  0.3751]],

        [[ 0.0060, -0.2693, -1.6171],
         [ 0.5033,  0.0055,  1.5146]]])
tensor([[[-1.2129,  0.5628, -0.1500],
         [ 0.0060, -0.2693, -1.6171]],

        [[ 0.6058, -0.3978,  0.3751],
         [ 0.5033,  0.0055,  1.5146]]])
tensor([[[-1.2129,  0.0060],
         [ 0.5628, -0.2693],
         [-0.1500, -1.6171]],

        [[ 0.6058,  0.5033],
         [-0.3978,  0.0055],
         [ 0.3751,  1.5146]]])
tensor([[[-1.2129,  0.0060],
         [ 0.5628, -0.2693],
         [-0.1500, -1.6171]],

        [[ 0.6058,  0.5033],
         [-0.3978,  0.0055],
         [ 0.3751,  1.5146]]])
tensor([[[-1.2129,  0.5628, -0.1500],
         [ 0.6058, -0.3978,  0.3751]],

        [[-1.2129,  0.5628, -0.1500],
         [ 0.6058, -0.3978,  0.3751]]])


In [21]:
# squeeze and unsqueeze
x = torch.arange(start=1, end=10, step=1).view(1,9)
print("x: ", x)
print("x.shape: ", x.shape)

# squeeze the tensor
x_squeeze = torch.squeeze(x, dim=0)
print("x_squeeze: ", x_squeeze)
print("x_squeeze.shape: ", x_squeeze.shape)

# unsqueeze the tensor
x_unsqueeze = torch.unsqueeze(x_squeeze, dim=0)
print("x_unsqueeze: ", x_unsqueeze)
print("x_unsqueeze.shape: ", x_unsqueeze.shape)

x_unsqueeze = x_squeeze.unsqueeze(dim=1)
print("x_unsqueeze: ", x_unsqueeze)
print("x_unsqueeze.shape: ", x_unsqueeze.shape)

x:  tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])
x.shape:  torch.Size([1, 9])
x_squeeze:  tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])
x_squeeze.shape:  torch.Size([9])
x_unsqueeze:  tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])
x_unsqueeze.shape:  torch.Size([1, 9])
x_unsqueeze:  tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6],
        [7],
        [8],
        [9]])
x_unsqueeze.shape:  torch.Size([9, 1])


In [33]:
# permute
x = torch.arange(24).view(2, 3, 4)
# print("x: ", x)
print("x.shape: ", x.shape)

# permute the tensor
x_permute = x.permute(1, 2, 0)
# print("x_permute: ", x_permute)
print("x_permute.shape: ", x_permute.shape)

# permuted tensor shares the same memory with the original tensor
x_permute[0, 0, 0] = -1
print("x_permute[0, 0, 0]: ", x_permute[0, 0, 0])
print("x[0, 0, 0]: ", x[0, 0, 0])

x.shape:  torch.Size([2, 3, 4])
x_permute.shape:  torch.Size([3, 4, 2])
x_permute[0, 0, 0]:  tensor(-1)
x[0, 0, 0]:  tensor(-1)


In [38]:
# indexing
x = torch.arange(12).view(1, 3, 4)
print("x: ", x)

# index the outermost bracket
print("x[0]: ", x[0])

# index the middle bracket
print("x[0][1]: ", x[0][1])

# index the innermost bracket
print("x[0][1][2]: ", x[0][1][2])

# index the element 9
print("get the element 9: ", x[0][2][1])

## Challenge
# index on x to return 5
print("Index on x to return 5: ", x[0, 1, 1])

# index on x to return [1, 5, 9]
print("Index on x to return [1, 5, 9]: ", x[0, :, 1])

x:  tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]])
x[0]:  tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
x[0][1]:  tensor([4, 5, 6, 7])
x[0][1][2]:  tensor(6)
get the element 9:  tensor(9)
Index on x to return 5:  tensor(5)
Index on x to return [1, 5, 9]:  tensor([1, 5, 9])


## Runnig tensors and PyTorch objects on GPUs

In [5]:
# check for GPU access with PyTorch
import torch
print("Is CUDA available: ", torch.cuda.is_available())

# device agnostic code
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print("Device: ", device)

# count the number of GPUs
print("Number of GPUs: ", torch.cuda.device_count())

Is CUDA available:  True
Device:  cuda
Number of GPUs:  1


### Put tensors on GPUs

In [4]:
# create a random tensor
tensor_rand = torch.rand(2,3)
print("tensor_rand: ", tensor_rand)
print("tensor_rand.device: ", tensor_rand.device)

# move the tensor to the GPU
tensor_rand = tensor_rand.to(device)
print("tensor_rand.device: ", tensor_rand.device)

tensor_rand:  tensor([[0.6117, 0.1904, 0.3525],
        [0.5000, 0.9904, 0.1421]])
tensor_rand.device:  cpu
tensor_rand.device:  cuda:0


### Numpy is unable to run on GPU

In [None]:
# convert the tensor to a numpy array
# leave the error here on purpose
numpy_rand = tensor_rand.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [7]:
# numpy can not run on the GPU
# move the tensor back to the CPU
numpy_rand = tensor_rand.cpu().numpy()

## Exercises
- Create a random tensor with shape (7, 7).
- Perform a matrix multiplication on the tensor from previous step with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).
- Set the random seed to 0 and do exercises 2 & 3 over again.
- Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.
- Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).
- Perform a matrix multiplication on the tensors you created in prvious step (again, you may have to adjust the shapes of one of the tensors).
- Find the maximum and minimum values of the output of previous step.
- Find the maximum and minimum index values of the output of previous step.
- Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [47]:
%%time
# a random tensor with shape (7, 7)
import torch
import random

# set the seed for reproducibility
RANDOM_SEED = 42

# set the seed for generating random numbers
# torch.cuda.manual_seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
x = torch.rand(7, 7)
print(x)

# set the seed for generating random numbers
torch.manual_seed(RANDOM_SEED)
y = torch.rand(1, 7)
print(y)

# transpose the tensor
y_T = y.T
print(y_T)


tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566],
        [0.7936, 0.9408, 0.1332, 0.9346, 0.5936, 0.8694, 0.5677],
        [0.7411, 0.4294, 0.8854, 0.5739, 0.2666, 0.6274, 0.2696],
        [0.4414, 0.2969, 0.8317, 0.1053, 0.2695, 0.3588, 0.1994],
        [0.5472, 0.0062, 0.9516, 0.0753, 0.8860, 0.5832, 0.3376],
        [0.8090, 0.5779, 0.9040, 0.5547, 0.3423, 0.6343, 0.3644],
        [0.7104, 0.9464, 0.7890, 0.2814, 0.7886, 0.5895, 0.7539]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566]])
tensor([[0.8823],
        [0.9150],
        [0.3829],
        [0.9593],
        [0.3904],
        [0.6009],
        [0.2566]])
CPU times: user 2.36 ms, sys: 867 μs, total: 3.23 ms
Wall time: 2.71 ms


In [54]:
%%time

# multiply the two tensors
z = torch.matmul(x, y_T)
print("matrix multiplication:", z)


matrix multiplication: tensor([[3.2618],
        [3.4084],
        [2.4866],
        [1.4525],
        [1.7079],
        [2.7291],
        [2.9204]])
CPU times: user 1.57 ms, sys: 938 μs, total: 2.51 ms
Wall time: 1.7 ms


In [35]:
# put the tensor on the GPU
x = x.to(device)
y_T = y_T.to(device)

In [46]:
%%time

# multiply the two tensors
z = torch.matmul(x, y_T)
print("matrix multiplication on the GPU:", z)

matrix multiplication on the GPU: tensor([[3.2618],
        [3.4084],
        [2.4866],
        [1.4525],
        [1.7079],
        [2.7291],
        [2.9204]], device='cuda:0')
CPU times: user 2.75 ms, sys: 141 μs, total: 2.89 ms
Wall time: 2.14 ms


In [56]:
# the min and max values of the tensor
print("min: ", z.min())
print("max: ", z.max())

# their indices
print("argmin: ", z.argmin())
print("argmax: ", z.argmax())

min:  tensor(1.4525)
max:  tensor(3.4084)
argmin:  tensor(3)
argmax:  tensor(1)


In [67]:
# Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed
# to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor
# and it's shape as well as the second tensor and it's shape.

# set the seed for generating random tensors
RANDOM_SEED = 7
torch.manual_seed(7)
x = torch.rand(1, 1, 1, 10)
print("x: ", x)
print("x.shape: ", x.shape)

# squeeze the tensor
x_squeeze = x.squeeze()
print("x_squeeze: ", x_squeeze)
print("x_squeeze.shape: ", x_squeeze.shape)


x:  tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]])
x.shape:  torch.Size([1, 1, 1, 10])
x_squeeze:  tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513])
x_squeeze.shape:  torch.Size([10])
