In [2]:
import torch
torch.__version__

'2.3.1'

A scalar is a zero-dimensional tensor, a single number.

In [3]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim # ndim is its property, not a method, so no parentheses

0

In [5]:
scalar.item() # Use the item() method to take out the numbers in the tensor.

7

A vector is a one-dimensional tensor, a directional number, but can contain many numbers.

In [6]:
vector = torch.tensor([7, 7]) # Use [] to determine the dimension, count the number of left or right brackets.
vector

tensor([7, 7])

In [7]:
vector.ndim

1

In [8]:
vector.size()

torch.Size([2])

In [9]:
vector.shape

torch.Size([2])

A matrix has an additional dimension compared to a vector, a two-dimensional array.

In [10]:
matrix = torch.tensor([[7,8],[9,10]])
matrix

tensor([[ 7,  8],
        [ 9, 10]])

In [11]:
matrix.ndim

2

In [12]:
matrix.shape

torch.Size([2, 2])

A tensor is an n-dimensional array.

In [13]:
TENSOR = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [14]:
TENSOR.ndim

3

In [15]:
TENSOR.shape

torch.Size([1, 3, 3])

Dimensions are from outside to inside, 1d 3x3

Machine learning models start from a large number of random tensor data.

In [16]:
random_tensor = torch.rand(3,4)
random_tensor, random_tensor.dtype

(tensor([[0.5471, 0.8347, 0.7225, 0.2744],
         [0.5861, 0.9953, 0.3630, 0.4820],
         [0.1384, 0.2860, 0.0387, 0.4295]]),
 torch.float32)

In [17]:
random_image = torch.rand(224, 224, 3)
random_image.shape, random_image.ndim

(torch.Size([224, 224, 3]), 3)

Fill the tensor with 0/1

In [18]:
zeros = torch.zeros(size = (3,4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [19]:
ones = torch.ones(size = (3,4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

Range tensor: torch.arange(start, end, step)

In [20]:
zero_to_ten = torch.arange(0, 10)
zero_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [21]:
zero_to_ten_even = torch.arange(0, 10, 2)
zero_to_ten_even

tensor([0, 2, 4, 6, 8])

Wants the same shape

In [22]:
ten_zeros = torch.zeros_like(zero_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Default Parameters

In [None]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # Defaults to torch.float32
                               device=None,
                               requires_grad=False) # If true, the operations performed on the tensor will be recorded
float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [24]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16)
float_16_tensor.dtype

torch.float16

Understand the details of tensors.

In [25]:
some_tensor = torch.rand(3,4)
print(some_tensor)
print(some_tensor.shape)
print(some_tensor.dtype)
print(some_tensor.device)

tensor([[0.6930, 0.9186, 0.0624, 0.7615],
        [0.3582, 0.8818, 0.3684, 0.0921],
        [0.5897, 0.4885, 0.3274, 0.1872]])
torch.Size([3, 4])
torch.float32
cpu


Operation of tensors

In [26]:
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [27]:
tensor * 10

tensor([10, 20, 30])

In [28]:
tensor - 10

tensor([-9, -8, -7])

Built-in operations

In [29]:
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [74]:
tensor # But did not change the original value

tensor([1, 2, 3])

In [None]:
tensor * tensor # Bitwise multiplication

tensor([1, 4, 9])

Matrix multiplication:

(2, 3) @ (3, 2) -> (2, 2)

(3, 2) @ (2, 3) -> (3, 3)

In [32]:
torch.matmul(tensor, tensor)

tensor(14)

In [33]:
tensor @ tensor

tensor(14)

The shape of the matrix must match

In [34]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

Cannot be multiplied directly, must be transposed

In [35]:
output = torch.matmul(tensor_A, tensor_B.T)
output

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

matmul can be abbreviated as mm

In [36]:
output = torch.mm(tensor_A, tensor_B.T)
output

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Matrix multiplication animation: http://matrixmultiplication.xyz/

Similar matrix multiplication is also called the dot product of two matrices

Linear layer:

In [37]:
torch.manual_seed(42)
linear = torch.nn.Linear(in_features=2,
                         out_features=6)
x = tensor_A
output = linear(x)
print(x.shape)
print(output.shape)
print(output)

torch.Size([3, 2])
torch.Size([3, 6])
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)


PyTorch Optimization Suggestions:

- The default settings of DataLoader are not very reasonable. It is recommended to set num_workers to a value greater than 0 and enable pin_memory = True by default.

- Use torch.backends.cudnn.benchmark = True to automatically optimize the selection of cuDNN kernels.

- Maximize the batch size for each GPU to distribute the computational cost.

- In the weight layers before the BatchNorm layer, do not forget to set bias=False - otherwise, the bias is an invalid redundant parameter that will increase the model size.

- Replace model.zero_grad() with for p in model.parameters(): p.grad = None.

- In production environments, be sure to turn off debugging APIs (such as detect_anomaly/profiler/emit_nvtx/gradcheck, etc.).

- Even if distributed training is not running, use DistributedDataParallel instead of DataParallel.

- If the input size is not fixed, pay attention to load balancing among all GPUs to avoid GPU idleness.

- Use the fusion optimizer from apex (the default optimizer in PyTorch will iterate over parameters individually, which is very inefficient).

- Use checkpointing techniques to recalculate memory-intensive but computationally efficient operations during backpropagation (such as activation functions, upsampling, etc.).

- Use the @torch.jit.script decorator, especially for fusion operations like GELU that are continuous and point-wise.

Find the maximum, minimum, mean, and sum.

In [38]:
x = torch.arange(0, 100, 10)
x.dtype

torch.int64

In [None]:
print(x.min())
print(x.max())
print(x.type(torch.float32).mean()) # To find the mean value, the data type must be specified.
print(x.sum())

tensor(0)
tensor(90)
tensor(45.)
tensor(450)


Find the location of the maximum and minimum values

In [40]:
print(x.argmax())
print(x.argmin())

tensor(9)
tensor(0)


With decimal point the default is float32

In [41]:
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [None]:
tensor_float16 = tensor.type(torch.float16) # Pay attention to type conversion using tensor.type(to the desired type)
tensor_float16.dtype

torch.float16

In [None]:
tensor_int8 = tensor.type(torch.int8) # Pay attention to type conversion using tensor.type(to the desired type)
tensor_int8.dtype

torch.int8

Change the shape of the tensor.

In [44]:
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [45]:
x_reshape = x.reshape(1, 7)
x_reshape, x_reshape.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [None]:
y = x.view(1, 7) 
# Generate a new view, but share memory with the original data. If you modify the data, it will be modified synchronously.

In [47]:
y[:, 0] = 5
y

tensor([[5., 2., 3., 4., 5., 6., 7.]])

In [None]:
x # But the shape of the x itself is not affected

tensor([5., 2., 3., 4., 5., 6., 7.])

Stack 5 times, using torch.stack()

In [49]:
x_stacked = torch.stack([x, x, x, x, x], dim = 0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.],
        [5., 2., 3., 4., 5., 6., 7.]])

Compress the tensor to a dimension only> 1: torch.squeeze()

In [50]:
print(x_reshape)
print(x_reshape.shape)

x_squeezed = torch.squeeze(x_reshape)
print(x_squeezed)
print(x_squeezed.shape)

tensor([[5., 2., 3., 4., 5., 6., 7.]])
torch.Size([1, 7])
tensor([5., 2., 3., 4., 5., 6., 7.])
torch.Size([7])


Use torch.unsqueeze() to add a dimension at a specific index

In [None]:
x_unsqueezed = x_squeezed.unsqueeze(dim = 0) # Becomes one row with seven columns
print(x_unsqueezed)
print(x_unsqueezed.shape)

tensor([[5., 2., 3., 4., 5., 6., 7.]])
torch.Size([1, 7])


In [None]:
x_unsqueezed = x_squeezed.unsqueeze(dim = 1) # Becomes one row with seven columns
print(x_unsqueezed)
print(x_unsqueezed.shape)

tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.]])
torch.Size([7, 1])


To adjust the dimension sequence, use torch.permute()

In [None]:
x_original = torch.rand(224, 224, 3)
x_permuted = x_original.permute(2, 0, 1) 
# Move the item originally in position 2 to position 0, move the item originally in position 0 to position 1, and move the item originally in position 1 to position 2.
print(x_permuted.shape)

torch.Size([3, 224, 224])


The index value is from the outer dimension-> inner dimension (see square brackets)

In [54]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [55]:
print(x[0])
print(x[0][0])
print(x[0][0][0])

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(1)


All values in this dimension are represented by ":".

In [56]:
x[:, 0]

tensor([[1, 2, 3]])

In [57]:
x[:, :, 1]

tensor([[2, 5, 8]])

In [58]:
x[:, 1, 1]

tensor([5])

In [59]:
x[0, 0, :]

tensor([1, 2, 3])

Conversion between numpy arrays and PyTorch tensors

Convert numpy array to pytorch tensor

In [60]:
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
array = array + 1 
# array = array + 1: Create a new array, disconnect memory sharing, and do not affect each other.
# array += 1: Modify the original array in place, memory sharing still takes effect. If so, the array and tensor will be modified synchronously.
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

PyTorch tensor to NumPy array

In [62]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [63]:
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

Repeatability: Fixed random number seed

In [None]:
import random
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED) # Set random number seed, use torch.manual_seed (manual seed)
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)
random_tensor_A == random_tensor_B

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [None]:
import random
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_A = torch.rand(3, 4)
torch.manual_seed(RANDOM_SEED) # If you want multiple random tensors to be completely the same: you need to set the same seed before generating each tensor.
random_tensor_B = torch.rand(3, 4)
random_tensor_A == random_tensor_B

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

Accessing the GPU

In [66]:
torch.cuda.is_available()

False

In [67]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [68]:
torch.cuda.device_count()

0

Using Apple Chips

In [69]:
torch.backends.mps.is_available()

True

In [70]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

Put the tensor on the apple chip

In [71]:
tensor = torch.tensor([1, 2, 3])
print(tensor.device)

tensor_on_gpu = tensor.to(device)
tensor_on_gpu

cpu


tensor([1, 2, 3], device='mps:0')

Return to CPU

In [None]:
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu # copy

array([1, 2, 3])

In [None]:
tensor_on_gpu # The original is still on mps

tensor([1, 2, 3], device='mps:0')