In [1]:
import torch 
print(torch.__version__)

2.1.2


In [2]:
if torch.cuda.is_available():
    print("cuda is available")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("cuda is not available, using CPU")

cuda is not available, using CPU


In [3]:
# Creating empty tensor
torch.empty(2, 3) # the values are uninitialized, we get values which are already present in the memory location

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [4]:
a = torch.empty(3, 3)
type(a) # types of tensor

torch.Tensor

In [5]:
# Using zeros to create tensor
torch.zeros(2, 2)

tensor([[0., 0.],
        [0., 0.]])

In [6]:
torch.ones(2, 2)

tensor([[1., 1.],
        [1., 1.]])

In [7]:
torch.rand(3, 3) # random values between 0 and 1, if we run again we get different values

tensor([[0.3211, 0.1874, 0.4415],
        [0.3528, 0.9183, 0.3911],
        [0.8761, 0.5227, 0.3042]])

In [8]:
# Manual seed, if we want to get same random values again
torch.manual_seed(100)
torch.rand(3, 3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539],
        [0.2627, 0.0428, 0.2080]])

In [9]:
torch.tensor([[1, 2, 3, 4], [9, 8, 7, 6]])

tensor([[1, 2, 3, 4],
        [9, 8, 7, 6]])

In [10]:
# Arange function
torch.arange(10) # similar to python range function


tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
torch.arange(1, 10)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
torch.arange(3, 10, 2) # start, end, step

tensor([3, 5, 7, 9])

In [13]:
# Using linspace to create tensor with evenly spaced values
torch.linspace(0, 10, steps=5) # start, end, number of

tensor([ 0.0000,  2.5000,  5.0000,  7.5000, 10.0000])

In [14]:
torch.linspace(0, 5, steps=11)

tensor([0.0000, 0.5000, 1.0000, 1.5000, 2.0000, 2.5000, 3.0000, 3.5000, 4.0000,
        4.5000, 5.0000])

In [15]:
# Using eye to create identity matrix
torch.eye(4)

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [16]:
# usin full
torch.full((2, 3), 7) # shape, fill value

tensor([[7, 7, 7],
        [7, 7, 7]])

In [17]:
x = torch.tensor([[1, 2, 3], [9, 8, 7]])
x

tensor([[1, 2, 3],
        [9, 8, 7]])

In [18]:
x.shape

torch.Size([2, 3])

In [19]:
torch.empty_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [20]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [21]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1]])

### Data Types in Tensors

In [22]:
x.dtype

torch.int64

In [23]:
torch.tensor([1.0, 2.0, 3.0], dtype=torch.int32)

  torch.tensor([1.0, 2.0, 3.0], dtype=torch.int32)


tensor([1, 2, 3], dtype=torch.int32)

In [24]:
torch.tensor([[1, 3, 4], [5, 6, 7]], dtype=torch.float64)

tensor([[1., 3., 4.],
        [5., 6., 7.]], dtype=torch.float64)

In [25]:
x.to(torch.int32)

tensor([[1, 2, 3],
        [9, 8, 7]], dtype=torch.int32)

In [26]:
d = torch.tensor([[1.9, 2.8, 3.7], [4.6, 5.5, 6.4]])

torch.round(d) # Rounds to nearest integer

tensor([[2., 3., 4.],
        [5., 6., 6.]])

In [27]:
torch.ceil(d) # Rounds to next integer


tensor([[2., 3., 4.],
        [5., 6., 7.]])

In [28]:
torch.floor(d)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [29]:
torch.clamp(d, min=2.0, max=6.0)

tensor([[2.0000, 2.8000, 3.7000],
        [4.6000, 5.5000, 6.0000]])

In [30]:
e = torch.randint(size=(3, 3), low=0, high=10) # size, low, high
e

tensor([[0, 4, 3],
        [8, 8, 4],
        [4, 1, 0]])

In [31]:
torch.sum(e) # sum of all elements

tensor(32)

In [32]:
torch.sum(e, dim=0) # sum along columns

tensor([12, 13,  7])

In [33]:
torch.sum(e, dim=1) # sum along rows

tensor([ 7, 20,  5])

In [34]:
f = torch.randint(size=(4, 3), low=0, high=10, dtype=torch.float16)
f

tensor([[4., 3., 8.],
        [7., 4., 6.],
        [0., 1., 9.],
        [9., 8., 0.]], dtype=torch.float16)

In [35]:
torch.mean(f) # mean of all elements


tensor(4.9180, dtype=torch.float16)

In [36]:
torch.mean(f, dim=0) # mean along columns

tensor([5.0000, 4.0000, 5.7500], dtype=torch.float16)

In [37]:
torch.mean(f, dim=1) # mean along rows

tensor([5.0000, 5.6680, 3.3340, 5.6680], dtype=torch.float16)

In [38]:
g = torch.randint(size=(4, 3), low=5, high=9, dtype=torch.float16)
g

tensor([[8., 5., 5.],
        [8., 6., 6.],
        [6., 8., 5.],
        [5., 8., 8.]], dtype=torch.float16)

In [39]:
# Meadian
torch.median(g) # median of all elements

RuntimeError: "median_cpu" not implemented for 'Half'

In [None]:
torch.min(g)

tensor(5., dtype=torch.float16)

In [None]:
torch.max(g)

tensor(8., dtype=torch.float16)

In [None]:
torch.prod(g)

tensor(inf, dtype=torch.float16)

In [None]:
torch.std(g) # standard deviation of all elements, default is all elements, can specify dim as well

tensor(1.1543, dtype=torch.float16)

In [None]:
torch.std(g, dim=0) # standard deviation along columns

tensor([0.8164, 1.0000, 0.5771], dtype=torch.float16)

In [None]:
torch.var(g) # variance of all elements

tensor(1.3330, dtype=torch.float16)

In [None]:
torch.argmin(g)

tensor(1)

In [None]:
torch.argmax(g) # index of max element

tensor(3)

In [None]:
x = torch.randint(low=0, high=10, size=(2, 3))
y = torch.randint(low=0, high=10, size=(3, 2))
print(x)
print(y)

tensor([[7, 1, 5],
        [0, 2, 3]])
tensor([[7, 0],
        [2, 5],
        [2, 8]])


In [None]:
# Matrix Multiplication
torch.matmul(x, y)

tensor([[61, 45],
        [10, 34]])

In [None]:
vector1 = torch.tensor([1, 3, 5])
vector2 = torch.tensor([2, 4, 6])

In [None]:
# Dot Product
torch.dot(vector1, vector2)

tensor(44)

In [None]:
# Transpose
matrix = torch.tensor([[1, 2, 3], [6, 7, 9]])
torch.transpose(matrix, 0, 1) # dim0, dim1

tensor([[1, 6],
        [2, 7],
        [3, 9]])

In [None]:
matrix.T

tensor([[1, 6],
        [2, 7],
        [3, 9]])

In [None]:
a = torch.tensor([[1, 2, 3], [3, 4, 4]])
a.T

tensor([[1, 3],
        [2, 4],
        [3, 4]])

In [None]:
x1 = torch.randint(low=0, high=10, size=(4, 4), dtype=torch.float32)
x1

tensor([[0., 2., 5., 3.],
        [6., 7., 8., 1.],
        [2., 3., 2., 7.],
        [9., 3., 9., 7.]])

In [None]:
# Determinant
torch.det(x1)

tensor(1540.0001)

In [None]:
# Inverse
torch.inverse(x1)

tensor([[-0.2091,  0.0227, -0.0123,  0.0987],
        [-0.0636,  0.1591,  0.1422, -0.1377],
        [ 0.2091, -0.0227, -0.1305,  0.0442],
        [ 0.0273, -0.0682,  0.1227,  0.0182]])

### Comparision Operations

In [None]:
i = torch.randint(size=(2, 3), low=0, high=10)
j = torch.randint(size=(2, 3), low=0, high=10)
print(i)
print(j)

tensor([[8, 9, 6],
        [1, 9, 6]])
tensor([[8, 1, 7],
        [5, 6, 9]])


In [None]:
# Greater than
i > j

tensor([[False,  True, False],
        [False,  True, False]])

In [None]:
#Less than
i < j

tensor([[False, False,  True],
        [ True, False,  True]])

In [None]:
i == j # Equality

tensor([[ True, False, False],
        [False, False, False]])

In [None]:
i != j # Not equal

tensor([[False,  True,  True],
        [ True,  True,  True]])

#### Special Functions

In [None]:
k = torch.randint(size=(2, 3), low=5, high=50, dtype=torch.float32)
k

tensor([[15.,  9., 38.],
        [31., 44., 27.]])

In [None]:
# Log 
torch.log(k)

tensor([[3.1355, 3.0910, 3.1355],
        [3.8712, 1.6094, 3.4965]])

In [None]:
# Exponents
torch.exp(k)

tensor([[9.7448e+09, 3.5849e+09, 9.7448e+09],
        [7.0167e+20, 1.4841e+02, 2.1464e+14]])

In [None]:
# Square root 
torch.sqrt(k)

tensor([[4.7958, 4.6904, 4.7958],
        [6.9282, 2.2361, 5.7446]])

In [None]:
# Sigmoid 
torch.sigmoid(k)

tensor([[1.0000, 1.0000, 1.0000],
        [1.0000, 0.9933, 1.0000]])

In [None]:
# torch.softmax(k, dim=0) explanation:
# Softmax converts raw values into probabilities that sum to 1
# Formula: softmax(x_i) = exp(x_i) / sum(exp(x_j)) for all j in the dimension

# With dim=0, softmax is applied along columns (across rows)
# For each column, values are normalized to sum to 1

# Example with k:
print("Original tensor k:")
print(k)
print("\nSoftmax along dim=0 (columns):")
print(torch.softmax(k, dim=0))
print("\nSum along dim=0 (should be 1 for each column):")
print(torch.softmax(k, dim=0).sum(dim=0))

Original tensor k:
tensor([[15.,  9., 38.],
        [31., 44., 27.]])

Softmax along dim=0 (columns):
tensor([[1.1254e-07, 6.3051e-16, 9.9998e-01],
        [1.0000e+00, 1.0000e+00, 1.6701e-05]])

Sum along dim=0 (should be 1 for each column):
tensor([1., 1., 1.])


In [None]:
# Softmax
torch.softmax(k, dim=0)

tensor([[1.1254e-07, 6.3051e-16, 9.9998e-01],
        [1.0000e+00, 1.0000e+00, 1.6701e-05]])

In [None]:
torch.relu(k)

tensor([[15.,  9., 38.],
        [31., 44., 27.]])

#### Inplace Operations

In [None]:
m = torch.rand(2, 3)
m


tensor([[0.5809, 0.1088, 0.7065],
        [0.0105, 0.4602, 0.2945]])

In [None]:
n = torch.rand(2, 3)
n

tensor([[0.0085, 0.2174, 0.1890],
        [0.0911, 0.6344, 0.3142]])

In [None]:
m + n

tensor([[0.5894, 0.3261, 0.8955],
        [0.1016, 1.0946, 0.6086]])

In [None]:
m.add_(n) # in-place addition, modifies m : it will not create a new tensor but will add n to m and store the result in m

tensor([[0.5894, 0.3261, 0.8955],
        [0.1016, 1.0946, 0.6086]])

In [None]:
n # print n to see the values which should be unchanged

tensor([[0.0085, 0.2174, 0.1890],
        [0.0911, 0.6344, 0.3142]])

In [None]:
n.relu_()

tensor([[0.0085, 0.2174, 0.1890],
        [0.0911, 0.6344, 0.3142]])

In [None]:
n

tensor([[0.0085, 0.2174, 0.1890],
        [0.0911, 0.6344, 0.3142]])

In [None]:
b = torch.randint(size =(2, 3), low=-1, high=4)
b

tensor([[-1,  3,  0],
        [ 0,  2,  3]])

In [None]:
torch.relu(b)

tensor([[0, 3, 0],
        [0, 2, 3]])

#### Copying a Tensor

In [None]:
a = torch.rand(2, 3)
a

tensor([[0.3183, 0.0321, 0.3290],
        [0.5301, 0.6401, 0.7954]])

In [None]:
b = a
b # print b to see that it references the same tensor as a

tensor([[0.3183, 0.0321, 0.3290],
        [0.5301, 0.6401, 0.7954]])

In [None]:
d = torch.rand(3, 3)
d

tensor([[0.3066, 0.2397, 0.1156],
        [0.4839, 0.3944, 0.0801],
        [0.7782, 0.6686, 0.2312]])

In [None]:
e = d.clone()
e

tensor([[0.3066, 0.2397, 0.1156],
        [0.4839, 0.3944, 0.0801],
        [0.7782, 0.6686, 0.2312]])

In [None]:
id(d)

6076582608

In [None]:
id(e)

6076583376

#### Tensor Operations on GPU

In [None]:
torch.cuda.is_available()

False

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

In [None]:
device # print device to see which device is being used

device(type='mps')

In [None]:
import torch

print(torch.__version__)

2.3.1


In [None]:
print("CUDA is Available :", torch.cuda.is_available())
print("MPS is Available :", torch.backends.mps.is_available() )


CUDA is Available : False
MPS is Available : True


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else "cpu")

device

device(type='mps')

In [None]:
# Creating a new tensor on GPU if available
torch.rand((3, 3), device = device)

tensor([[0.0655, 0.1922, 0.5020],
        [0.0472, 0.1763, 0.2453],
        [0.9805, 0.5566, 0.6175]], device='mps:0')

In [None]:
# moving to GPU/MPS if available 

a = torch.rand(3, 3) # Creating tensor on CPU
a.to(device) # moving tensor to the device (GPU/MPS/CPU) from CPU

tensor([[0.9221, 0.1066, 0.5523],
        [0.4243, 0.7728, 0.3614],
        [0.8585, 0.9878, 0.0158]], device='mps:0')

##### Perormance comparision

In [None]:
import time

# Define the size of the matrices 
size = 10000 # Large size for better performance measurement

# Create ranodm matrices on CPU
matrix1_cpu1 = torch.rand(size, size)
matrix2_cpu2 = torch.rand(size, size)

#Measure time on CPU
start_time = time.time()
result_cpu = torch.matmul(matrix1_cpu1, matrix2_cpu2)
end_time = time.time()
cpu_time = end_time - start_time
print(f"Time on CPU: {cpu_time:.4f} seconds")


# Move matrices to GPU/MPS if availble
matrix1_GPU = matrix1_cpu1.to(device)
matrix2_GPU = matrix2_cpu2.to(device)  

# Measure time on GPU/MPS
start_time = time.time()
result_GPU = torch.matmul(matrix1_GPU, matrix2_GPU)
end_time = time.time()
gpu_time = end_time - start_time
print(f"Time on GPU/MPS: {gpu_time:.4f} seconds")


Time on CPU: 0.7107 seconds
Time on GPU/MPS: 0.0444 seconds


In [None]:
# Compare results 
print(f"Speedup (CPU time / GPU time ): {cpu_time / gpu_time:.2f}x")

Speedup (CPU time / GPU time ): 16.02x


#### Reshaping Tensors

In [None]:
a = torch.ones(4, 4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
# Reshape 
a.reshape(2, 2, 2, 2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [None]:
# Flatten 
a.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [None]:
# Permute dimensions
a.permute(1, 0) # swaps dimensions 0 and 1 which means rows become columns and columns become rows

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
b1 = torch.randn(3, 4)
b1

tensor([[-2.2079,  0.2532, -1.7878,  0.4281],
        [ 0.4645,  0.3569,  0.5323,  1.5134],
        [-0.7971,  0.0232, -2.1527,  0.4327]])

In [None]:
b1.permute(1, 0) # swaps dimensions 0 and 1 which means rows become columns and columns become rows

tensor([[-2.2079,  0.4645, -0.7971],
        [ 0.2532,  0.3569,  0.0232],
        [-1.7878,  0.5323, -2.1527],
        [ 0.4281,  1.5134,  0.4327]])

In [None]:
b = torch.rand(3, 3, 4)
b

tensor([[[0.7610, 0.6272, 0.9826, 0.7314],
         [0.2950, 0.3958, 0.0196, 0.5848],
         [0.0757, 0.7439, 0.3691, 0.6280]],

        [[0.1267, 0.4877, 0.1400, 0.5390],
         [0.5520, 0.3668, 0.9237, 0.4731],
         [0.0132, 0.2273, 0.8028, 0.4663]],

        [[0.4373, 0.6158, 0.3598, 0.4759],
         [0.4378, 0.3010, 0.3541, 0.0753],
         [0.8572, 0.5181, 0.0994, 0.6812]]])

In [None]:
b.permute(2, 0, 1) # new order of dimensions


tensor([[[0.7610, 0.2950, 0.0757],
         [0.1267, 0.5520, 0.0132],
         [0.4373, 0.4378, 0.8572]],

        [[0.6272, 0.3958, 0.7439],
         [0.4877, 0.3668, 0.2273],
         [0.6158, 0.3010, 0.5181]],

        [[0.9826, 0.0196, 0.3691],
         [0.1400, 0.9237, 0.8028],
         [0.3598, 0.3541, 0.0994]],

        [[0.7314, 0.5848, 0.6280],
         [0.5390, 0.4731, 0.4663],
         [0.4759, 0.0753, 0.6812]]])

In [None]:
# Unsqueeze
c = torch.randn(226, 226, 3)
c.unsqueeze(0).shape # adds a new dimension at index 0, resulting in shape (1, 226, 226, 3), we can add the new dimension at any index


torch.Size([1, 226, 226, 3])

In [None]:
# Squeeze 
d = torch.randn(1, 10, 3)
d

tensor([[[-0.6360, -0.2533, -1.6988],
         [ 0.6156,  0.4561,  0.6661],
         [ 0.4174,  1.1430, -0.4849],
         [ 1.7508, -0.4741,  0.9992],
         [ 0.8404,  0.9555, -1.7096],
         [-0.9413,  0.4692,  0.6816],
         [ 1.6966,  1.4909, -0.1526],
         [ 0.6084,  0.8067, -0.0062],
         [ 2.0115,  0.0265, -0.4888],
         [-1.0301, -1.3005,  0.9265]]])

In [None]:
d.squeeze_(0).shape

torch.Size([10, 3])

In [None]:
d.shape

torch.Size([10, 3])

#### NumPy and Pytorch
`We can easily convert tensors to numpy arrays, numpy arrays to PyTorch tensors`

In [None]:
import numpy as np

In [None]:
a = torch.tensor([[1, 2, 3], [4, 2, 1 ]])
a

tensor([[1, 2, 3],
        [4, 2, 1]])

In [None]:
b = a.numpy()
b

array([[1, 2, 3],
       [4, 2, 1]])

In [None]:
type(b)

numpy.ndarray

In [None]:
c = np.array([[4, 5, 6], [7, 8, 9]])
c

array([[4, 5, 6],
       [7, 8, 9]])

In [None]:
torch.from_numpy(c)

tensor([[4, 5, 6],
        [7, 8, 9]])