In [2]:
import torch
torch.__version__

'2.4.1'

In [3]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
# Get the Python number within a tensor (only works with one-element tensors)
scalar.item()

7

In [9]:
# Vector
vector = torch.tensor([7, 7, 8])
vector

tensor([7, 7, 8])

In [10]:
# check the number of dimensions of vector
vector.ndim

1

In [11]:
# check the shape of vector
vector.shape

torch.Size([3])

In [18]:
# Matrix
matrix = torch.tensor([[7, 8, 9],
                       [9, 10, 11],
                       [5,6,7]])
matrix

tensor([[ 7,  8,  9],
        [ 9, 10, 11],
        [ 5,  6,  7]])

In [19]:
matrix.ndim, matrix.shape

(2, torch.Size([3, 3]))

In [20]:
# Tensor
tensor = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
tensor

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [21]:
tensor.ndim

3

In [23]:
tensor.shape

torch.Size([1, 3, 3])

In [24]:
# create a tensor of shape (2, 3, 4)
tensor = torch.tensor([[[1, 2, 3, 4],
                        [4, 5, 6, 7],
                        [8, 9, 10, 11]],
                       [[11, 12, 13, 14],
                        [15, 16, 17, 18],
                        [19, 20, 21, 22]]])

In [25]:
tensor.ndim, tensor.shape

(3, torch.Size([2, 3, 4]))

In [26]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3, 4)
random_tensor, random_tensor.dtype, random_tensor.shape

(tensor([[0.8161, 0.2196, 0.3472, 0.4466],
         [0.8156, 0.0266, 0.7332, 0.5134],
         [0.0831, 0.1466, 0.6711, 0.4298]]),
 torch.float32,
 torch.Size([3, 4]))

In [28]:
# Create a random tensor of size (224, 224, 3)
random_tensor = torch.rand(224, 224, 3)
random_tensor.shape, random_tensor.dtype

(torch.Size([224, 224, 3]), torch.float32)

In [29]:
# Create a tensor of all zeros
zeros = torch.zeros(3, 4)
zeros, zeros.shape, zeros.dtype, zeros.ndim

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.Size([3, 4]),
 torch.float32,
 2)

In [30]:
# Create a tensor of all ones
ones = torch.ones(3, 4)
ones, ones.shape, ones.dtype, ones.ndim

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.Size([3, 4]),
 torch.float32,
 2)

In [32]:
# Use torch.arange() to create a tensor from 0 to 9
range_tensor = torch.arange(0, 10)
range_tensor, range_tensor.shape

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), torch.Size([10]))

In [33]:
# create a tensor of zeros similar to the shape of range_tensor
zeros_similar = torch.zeros_like(range_tensor)
zeros_similar, zeros_similar.shape

(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), torch.Size([10]))

In [34]:
# how to know if a tensor is on the GPU or CPU
range_tensor.device

device(type='cpu')

In [36]:
# Check if CUDA is available, otherwise use MPS if available
if torch.cuda.is_available():
    range_tensor = range_tensor.to("cuda")
elif torch.backends.mps.is_available():
    range_tensor = range_tensor.to("mps")
else:
    range_tensor = range_tensor.to("cpu")

range_tensor.device

device(type='mps', index=0)

In [37]:
# Set the device variable based on availability
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

# Example tensor
range_tensor = torch.arange(10)

# Transfer the tensor to the selected device
range_tensor = range_tensor.to(device)

# Print the device to verify
print(range_tensor.device)

mps:0


In [39]:
# default data type of a tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype = None,
                               device = None,
                               requires_grad=False)

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [40]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float16) # torch.half would also work

float_16_tensor.dtype

torch.float16

In [4]:
some_tensor = torch.rand(3, 4)

print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}")

tensor([[0.2812, 0.6620, 0.9223, 0.7241],
        [0.8090, 0.9610, 0.1114, 0.5545],
        [0.5943, 0.2706, 0.7941, 0.1952]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Basic Operations (addition, subtraction, multiplication)

In [6]:
# Create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [7]:
# multiply a tensor by a number
tensor * 10

tensor([10, 20, 30])

In [9]:
tensor # original tensor is unchanged

tensor([1, 2, 3])

In [10]:
# subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [11]:
# add and reassign
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [13]:
# can also use torch built-in functions
torch.add(tensor, 10), torch.subtract(tensor, 10), torch.multiply(tensor, 10)

(tensor([11, 12, 13]), tensor([-9, -8, -7]), tensor([10, 20, 30]))

In [14]:
tensor

tensor([1, 2, 3])

In [15]:
# element-wise multiplication
tensor = torch.tensor([1, 2, 3])
tensor * tensor

tensor([1, 4, 9])

### Matrix multiplication

In [16]:
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

In [17]:
# Element wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [18]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [20]:
%%time
# Matrix multiplication by hand
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
value

CPU times: user 885 μs, sys: 1.05 ms, total: 1.94 ms
Wall time: 1.02 ms


tensor(14)

In [21]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 124 μs, sys: 30 μs, total: 154 μs
Wall time: 145 μs


tensor(14)

In [24]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype = torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype = torch.float32)

torch.matmul(tensor_A, tensor_B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [25]:
tensor_A, tensor_B

(tensor([[1., 2.],
         [3., 4.],
         [5., 6.]]),
 tensor([[ 7., 10.],
         [ 8., 11.],
         [ 9., 12.]]))

In [26]:
tensor_A, tensor_B.T

(tensor([[1., 2.],
         [3., 4.],
         [5., 6.]]),
 tensor([[ 7.,  8.,  9.],
         [10., 11., 12.]]))

In [27]:
torch.matmul(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

$$ y = x\cdot{A^T} + b $$

In [38]:
torch.manual_seed(42)

linear = torch.nn.Linear(in_features=2, out_features=6)
x = tensor_A
output = linear(x)
output

tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

In [33]:
x.shape, linear.weight.shape, linear.bias.shape

(torch.Size([3, 2]), torch.Size([6, 2]), torch.Size([6]))

In [34]:
linear.weight, linear.bias

(Parameter containing:
 tensor([[ 0.5406,  0.5869],
         [-0.1657,  0.6496],
         [-0.1549,  0.1427],
         [-0.3443,  0.4153],
         [ 0.6233, -0.5188],
         [ 0.6146,  0.1323]], requires_grad=True),
 Parameter containing:
 tensor([ 0.5224,  0.0958,  0.3410, -0.0998,  0.5451,  0.1045],
        requires_grad=True))

In [35]:
torch.matmul(x, linear.weight.T) + linear.bias

tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddBackward0>)

In [43]:
x = torch.arange(0, 100, 10, dtype=torch.float32) 
x

tensor([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90.])

In [44]:
x.min(), x.max(), x.mean(), x.std()

(tensor(0.), tensor(90.), tensor(45.), tensor(30.2765))

In [42]:
x.dtype

torch.int64

In [45]:
x.argmax(), x.argmin()

(tensor(9), tensor(0))

In [46]:
import torch

# Example of reshaping a tensor
x = torch.arange(0, 10)
print("Original tensor:")
print(x)

# Reshape the tensor to shape (2, 5)
x_reshaped = x.view(2, 5)
print("Reshaped tensor (2, 5):")
print(x_reshaped)

Original tensor:
tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Reshaped tensor (2, 5):
tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])


In [1]:
import torch
x = torch.arange(1., 8.)
x, x.shape, x.dtype

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]), torch.float32)

In [2]:
x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [6]:
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [5]:
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [7]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [8]:
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked, x_stacked.shape

(tensor([[5., 2., 3., 4., 5., 6., 7.],
         [5., 2., 3., 4., 5., 6., 7.],
         [5., 2., 3., 4., 5., 6., 7.],
         [5., 2., 3., 4., 5., 6., 7.]]),
 torch.Size([4, 7]))

In [9]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked, x_stacked.shape

(tensor([[5., 5., 5., 5.],
         [2., 2., 2., 2.],
         [3., 3., 3., 3.],
         [4., 4., 4., 4.],
         [5., 5., 5., 5.],
         [6., 6., 6., 6.],
         [7., 7., 7., 7.]]),
 torch.Size([7, 4]))

In [10]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [11]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
Previous shape: torch.Size([7])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
New shape: torch.Size([1, 7])


In [12]:
x_original = torch.rand(224, 224, 3)
x_permuted = x_original.permute(2, 0, 1)

x_original.shape, x_permuted.shape

(torch.Size([224, 224, 3]), torch.Size([3, 224, 224]))

In [13]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [16]:
x[0], x[0][0], x[0][0][0]

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([1, 2, 3]),
 tensor(1))

In [18]:
# get all values of 0th dimension and the 0 index of 1st dimension
x[0, 0]

tensor([1, 2, 3])

In [19]:
x[:, :, 1]

tensor([[2, 5, 8]])

In [20]:
x[:, 1, 1]

tensor([5])

In [23]:
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array).type(torch.float32)
array, tensor, tensor.dtype

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.]),
 torch.float32)

In [24]:
array += 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [25]:
# Tensor to numpy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [26]:
!nvidia-smi

zsh:1: command not found: nvidia-smi


In [27]:
# check if mps is available
torch.backends.mps.is_available()

True

In [28]:
# chekc if cuda is available
torch.cuda.is_available()

False

In [30]:
# set device based on cuda, mps, or cpu availability
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

device

device(type='mps')

In [31]:
tensor = torch.tensor([1, 2, 3])

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [32]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu, tensor_on_gpu.device

(tensor([1, 2, 3], device='mps:0'), device(type='mps', index=0))

In [33]:
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [36]:
tensor_on_gpu.cpu().device

device(type='cpu')

In [35]:
tensor_on_gpu

tensor([1, 2, 3], device='mps:0')

### Exercises

In [39]:
import torch
tensor_A = torch.rand(7,7)
tensor_A, tensor_A.shape

(tensor([[0.7326, 0.9692, 0.7958, 0.3434, 0.7320, 0.7863, 0.3367],
         [0.6652, 0.0704, 0.0988, 0.1539, 0.3794, 0.7871, 0.1975],
         [0.9776, 0.5984, 0.4760, 0.7355, 0.8602, 0.6226, 0.7608],
         [0.9444, 0.1398, 0.2019, 0.1781, 0.5456, 0.4085, 0.0983],
         [0.1994, 0.6547, 0.3123, 0.8510, 0.5329, 0.0313, 0.3039],
         [0.1268, 0.8279, 0.4236, 0.3396, 0.6954, 0.6615, 0.6868],
         [0.4530, 0.7569, 0.2727, 0.8045, 0.5412, 0.1843, 0.7475]]),
 torch.Size([7, 7]))

In [40]:
tensor_B = torch.rand(1, 7)
tensor_B, tensor_B.shape

(tensor([[0.2805, 0.0982, 0.8926, 0.0672, 0.5304, 0.7518, 0.3861]]),
 torch.Size([1, 7]))

In [41]:
torch.matmul(tensor_A, tensor_B.T)

tensor([[2.1434],
        [1.1612],
        [2.0253],
        [1.1052],
        [0.8796],
        [1.6491],
        [1.2130]])

In [44]:
torch.manual_seed(0)
tensor_A = torch.rand(7,7)
tensor_A, tensor_A.shape

(tensor([[0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901],
         [0.8964, 0.4556, 0.6323, 0.3489, 0.4017, 0.0223, 0.1689],
         [0.2939, 0.5185, 0.6977, 0.8000, 0.1610, 0.2823, 0.6816],
         [0.9152, 0.3971, 0.8742, 0.4194, 0.5529, 0.9527, 0.0362],
         [0.1852, 0.3734, 0.3051, 0.9320, 0.1759, 0.2698, 0.1507],
         [0.0317, 0.2081, 0.9298, 0.7231, 0.7423, 0.5263, 0.2437],
         [0.5846, 0.0332, 0.1387, 0.2422, 0.8155, 0.7932, 0.2783]]),
 torch.Size([7, 7]))

In [45]:
tensor_B = torch.rand(1, 7)
tensor_B, tensor_B.shape

(tensor([[0.4820, 0.8198, 0.9971, 0.6984, 0.5675, 0.8352, 0.2056]]),
 torch.Size([1, 7]))

In [46]:
torch.matmul(tensor_A, tensor_B.T)

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [49]:
torch.manual_seed(1234)
tensor_A = torch.rand(2, 3)
tensor_B = torch.rand(2, 3)

tensor_A = tensor_A.to(device)
tensor_B = tensor_B.to(device)

tensor_A, tensor_B

  nonzero_finite_vals = torch.masked_select(


(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='mps:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='mps:0'))

In [51]:
tensor_mul = torch.matmul(tensor_A, tensor_B.T)
tensor_mul

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]], device='mps:0')

In [53]:
tensor_mul.max(), tensor_mul.min(), tensor_mul.mean(), tensor_mul.std()

(tensor(0.5617, device='mps:0'),
 tensor(0.3647, device='mps:0'),
 tensor(0.4789, device='mps:0'),
 tensor(0.0847, device='mps:0'))

In [54]:
tensor_mul.argmax(), tensor_mul.argmin()

(tensor(3, device='mps:0'), tensor(0, device='mps:0'))

In [56]:
torch.manual_seed(7)
tensor = torch.rand(1, 1, 1, 10)
tensor, tensor.squeeze(), tensor.shape, tensor.squeeze().shape

(tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
            0.3653, 0.8513]]]]),
 tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
         0.8513]),
 torch.Size([1, 1, 1, 10]),
 torch.Size([10]))