In [None]:
!pip install torch



In [None]:
import torch

In [None]:
# Check to see if your system has gpu
device = torch.cuda.is_available()
print(device) # prints True if gpu is available, else False

True


### Tensor Creation

In [None]:
t1 = torch.ones(size=(2,3))       # Tensors of ones of dimension 5x3
t2 = torch.zeros(size=(2,3))      # Tensors of zeros of dimension 5x3
t3 = torch.eye(3)                 # Identity matrix tensor of size 3
t4 = torch.rand(size=(3,4))       # Tensor of Uniformly distrubuted random numbers [0, 1) of size 3x4
t5 = torch.arange(7)              # A 1D tensor of integers [0, n-1]

print(t1,"\n")
print(t2,"\n")
print(t3,"\n")
print(t4,"\n")
print(t5)

tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

tensor([[0., 0., 0.],
        [0., 0., 0.]]) 

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]]) 

tensor([[0.5267, 0.9558, 0.6494, 0.3169],
        [0.7270, 0.3174, 0.6088, 0.6846],
        [0.3158, 0.6706, 0.6030, 0.1666]]) 

tensor([0, 1, 2, 3, 4, 5, 6])


#### Pushing the Data to Device

Before we deal with the data in any way, if it is pushed on to the device, which, in most cases, is going to be the GPU, helps in the execution speed significantly as it saves the time needed to fetch the data from the server to the GPU repetitively.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Pushing the data to device
tensor = torch.ones(size=(2,3))
tensor = tensor.to(device)

# in case we have the data pushed on to the cuda, use tensor.cuda.DoubleTensor for the tensor conversion/casting

cuda


## Accessing and Modifying Tensors
**NOTE:** Tensor values can easily be modified by using the accessing method, similar to numpy to select the desired section of the tensor to be modified

### Tensor indexing

In [None]:
# Basic
t = torch.rand(size=(2,3,4)) # 3D tensor

print('Original Tensor t: \n', t, "\n")

# Some valid ways of accessing individual elements in the tensor
print('t[0][0][0]\n', t[0][0][0],"\n")
print('t[1,2,3]\n', t[1,2,3],"\n")
print('t[-1,-1][-1]\n', t[-1,-1][-1],"\n")

Original Tensor t: 
 tensor([[[0.5790, 0.8357, 0.6766, 0.7671],
         [0.6737, 0.4272, 0.1614, 0.2119],
         [0.7833, 0.7023, 0.3235, 0.6283]],

        [[0.5599, 0.6473, 0.6313, 0.8300],
         [0.8150, 0.7023, 0.2754, 0.7235],
         [0.0547, 0.4414, 0.2659, 0.3734]]]) 

t[0][0][0]
 tensor(0.5790) 

t[1,2,3]
 tensor(0.3734) 

t[-1,-1][-1]
 tensor(0.3734) 



### Tensor slicing

In [None]:
print('Original Tensor t:')
print(t)
print('\n')

print('t[0]\n', t[0])
print('t[:1]\n', t[:1])
print('t[:,1]\n', t[:,1])
print('t[:,:,3]\n', t[:,:,3])
print('t[:,:,-2:]\n', t[:,:,-2:])

Original Tensor t:
tensor([[[0.5790, 0.8357, 0.6766, 0.7671],
         [0.6737, 0.4272, 0.1614, 0.2119],
         [0.7833, 0.7023, 0.3235, 0.6283]],

        [[0.5599, 0.6473, 0.6313, 0.8300],
         [0.8150, 0.7023, 0.2754, 0.7235],
         [0.0547, 0.4414, 0.2659, 0.3734]]])


t[0]
 tensor([[0.5790, 0.8357, 0.6766, 0.7671],
        [0.6737, 0.4272, 0.1614, 0.2119],
        [0.7833, 0.7023, 0.3235, 0.6283]])
t[:1]
 tensor([[[0.5790, 0.8357, 0.6766, 0.7671],
         [0.6737, 0.4272, 0.1614, 0.2119],
         [0.7833, 0.7023, 0.3235, 0.6283]]])
t[:,1]
 tensor([[0.6737, 0.4272, 0.1614, 0.2119],
        [0.8150, 0.7023, 0.2754, 0.7235]])
t[:,:,3]
 tensor([[0.7671, 0.2119, 0.6283],
        [0.8300, 0.7235, 0.3734]])
t[:,:,-2:]
 tensor([[[0.6766, 0.7671],
         [0.1614, 0.2119],
         [0.3235, 0.6283]],

        [[0.6313, 0.8300],
         [0.2754, 0.7235],
         [0.2659, 0.3734]]])


## Pivoting and Reshaping tensors
In the following section we cover common methods used to pivot and reshape tensors, namely:
1. Flatten
1. Squeeze
1. Reshape
1. View
1. Transpose
1. Permute

### Flatten: Changing a multi-dimensional tensors to a single dimension

In [None]:
t = torch.rand(size=(3,4,5)) # 3D tensor
print(t)
print(t.shape)               # 3x4x5
print(t.flatten())
print(t.flatten().shape)     # 60

tensor([[[0.3944, 0.8222, 0.2239, 0.0351, 0.8898],
         [0.9432, 0.8162, 0.7160, 0.1421, 0.3417],
         [0.5630, 0.3616, 0.0347, 0.8525, 0.4625],
         [0.0159, 0.2123, 0.7260, 0.4774, 0.7681]],

        [[0.1501, 0.8351, 0.9170, 0.1552, 0.2803],
         [0.9281, 0.2844, 0.4222, 0.7128, 0.2335],
         [0.4224, 0.1079, 0.3076, 0.3214, 0.2633],
         [0.6970, 0.8646, 0.4980, 0.2850, 0.8689]],

        [[0.0924, 0.5944, 0.3891, 0.4880, 0.8456],
         [0.9254, 0.9646, 0.3120, 0.7952, 0.6050],
         [0.0385, 0.3049, 0.1060, 0.4799, 0.8741],
         [0.5854, 0.6285, 0.3492, 0.9502, 0.1278]]])
torch.Size([3, 4, 5])
tensor([0.3944, 0.8222, 0.2239, 0.0351, 0.8898, 0.9432, 0.8162, 0.7160, 0.1421,
        0.3417, 0.5630, 0.3616, 0.0347, 0.8525, 0.4625, 0.0159, 0.2123, 0.7260,
        0.4774, 0.7681, 0.1501, 0.8351, 0.9170, 0.1552, 0.2803, 0.9281, 0.2844,
        0.4222, 0.7128, 0.2335, 0.4224, 0.1079, 0.3076, 0.3214, 0.2633, 0.6970,
        0.8646, 0.4980, 0.2850, 0.8689, 

#### Unsqueeze
With unsqueeze, you can add an extra dimension to a tensor. It will be helpful when you need an extra dimension as the 'batch size' in deep neural networks

In [None]:
t = torch.rand(size=(3,4,5))
ts = t.unsqueeze(dim=0)   # unsequeeze along axis 0
ts2 = t.unsqueeze(1)  # unsequeeze along axis 1

print(ts) # A new dimension is added while all the following dimension are incremented by 1 ( positionally)
print('Original tensor shape',t.shape)
print('Unsequeeze along axis 0',ts.shape)
print('\n')

print(ts.unsqueeze(0)) # Can apply this operation as many times as required
print('ts.unsqueeze(0).shape:',ts.unsqueeze(0).shape)
print('\n')

print(ts2) # Unsqueeze can also be applied to other intermediate dimensions
print('Unsequeeze along axis 1',ts2.shape)
print('\n')

tensor([[[[0.3225, 0.7255, 0.9254, 0.3215, 0.9387],
          [0.4475, 0.8461, 0.3826, 0.9327, 0.7372],
          [0.7489, 0.4121, 0.6856, 0.9030, 0.2835],
          [0.4789, 0.6807, 0.5737, 0.6033, 0.7014]],

         [[0.4841, 0.2558, 0.0480, 0.3342, 0.9883],
          [0.3018, 0.4091, 0.0266, 0.6619, 0.8940],
          [0.6933, 0.9932, 0.2695, 0.8372, 0.7449],
          [0.4759, 0.5227, 0.0032, 0.6577, 0.8936]],

         [[0.8290, 0.3613, 0.3132, 0.3181, 0.2365],
          [0.6944, 0.6509, 0.7072, 0.1913, 0.7504],
          [0.3070, 0.4127, 0.9654, 0.5993, 0.6281],
          [0.8023, 0.4698, 0.5616, 0.4668, 0.6333]]]])
Original tensor shape torch.Size([3, 4, 5])
Unsequeeze along axis 0 torch.Size([1, 3, 4, 5])


tensor([[[[[0.3225, 0.7255, 0.9254, 0.3215, 0.9387],
           [0.4475, 0.8461, 0.3826, 0.9327, 0.7372],
           [0.7489, 0.4121, 0.6856, 0.9030, 0.2835],
           [0.4789, 0.6807, 0.5737, 0.6033, 0.7014]],

          [[0.4841, 0.2558, 0.0480, 0.3342, 0.9883],
       

#### Squeeze

Remove empty dimension(s) from tensor

In [None]:
print(ts.squeeze(0))
print('ts.squeeze(0).shape:',ts.squeeze(0).shape)
print('\n')

print(ts.shape)

# print(ts2.squeeze(1))
print('ts2.squeeze(1).shape:',ts.squeeze(0).shape)
print('\n')

# print(t.squeeze(0)) squeezing dimensions that have multiple elements have no impacts on the tensor
# print(ts.squeeze(1)) squeezing dimensions that have multiple elements have no impacts on the tensor

tensor([[[0.3225, 0.7255, 0.9254, 0.3215, 0.9387],
         [0.4475, 0.8461, 0.3826, 0.9327, 0.7372],
         [0.7489, 0.4121, 0.6856, 0.9030, 0.2835],
         [0.4789, 0.6807, 0.5737, 0.6033, 0.7014]],

        [[0.4841, 0.2558, 0.0480, 0.3342, 0.9883],
         [0.3018, 0.4091, 0.0266, 0.6619, 0.8940],
         [0.6933, 0.9932, 0.2695, 0.8372, 0.7449],
         [0.4759, 0.5227, 0.0032, 0.6577, 0.8936]],

        [[0.8290, 0.3613, 0.3132, 0.3181, 0.2365],
         [0.6944, 0.6509, 0.7072, 0.1913, 0.7504],
         [0.3070, 0.4127, 0.9654, 0.5993, 0.6281],
         [0.8023, 0.4698, 0.5616, 0.4668, 0.6333]]])
ts.squeeze(0).shape: torch.Size([3, 4, 5])


torch.Size([1, 3, 4, 5])
ts2.squeeze(1).shape: torch.Size([3, 4, 5])




### Reshape
Change the shape of a tensor

In [None]:
print('Orginal tensor shape: ', t.shape)
print(t.reshape((-1,6)).shape, t.reshape((12,5)).shape)
print(t.reshape(12,-1), t.reshape(12,-1).shape) # Can use -1 to specify one of the dimensions which is automatically inferred based on the elements in other dimensions
print(t.reshape(5,4,3), t.reshape(5,4,3).shape)
print(t.reshape(-1), t.reshape(-1).shape)

# # Note:
# print(t.reshape(12,6)) This command won't work as the number of elements need to be consistent with the source tensor

Orginal tensor shape:  torch.Size([3, 4, 5])
torch.Size([10, 6]) torch.Size([12, 5])
tensor([[0.3225, 0.7255, 0.9254, 0.3215, 0.9387],
        [0.4475, 0.8461, 0.3826, 0.9327, 0.7372],
        [0.7489, 0.4121, 0.6856, 0.9030, 0.2835],
        [0.4789, 0.6807, 0.5737, 0.6033, 0.7014],
        [0.4841, 0.2558, 0.0480, 0.3342, 0.9883],
        [0.3018, 0.4091, 0.0266, 0.6619, 0.8940],
        [0.6933, 0.9932, 0.2695, 0.8372, 0.7449],
        [0.4759, 0.5227, 0.0032, 0.6577, 0.8936],
        [0.8290, 0.3613, 0.3132, 0.3181, 0.2365],
        [0.6944, 0.6509, 0.7072, 0.1913, 0.7504],
        [0.3070, 0.4127, 0.9654, 0.5993, 0.6281],
        [0.8023, 0.4698, 0.5616, 0.4668, 0.6333]]) torch.Size([12, 5])
tensor([[[0.3225, 0.7255, 0.9254],
         [0.3215, 0.9387, 0.4475],
         [0.8461, 0.3826, 0.9327],
         [0.7372, 0.7489, 0.4121]],

        [[0.6856, 0.9030, 0.2835],
         [0.4789, 0.6807, 0.5737],
         [0.6033, 0.7014, 0.4841],
         [0.2558, 0.0480, 0.3342]],

        [[

### Transpose
This operation is primarily a generalization of the regular matrix transpose

In [None]:
t = torch.tensor([[[1,2,3,4], [5,6,7,8], [9,10,11,12]],
                  [[-1,-2,-3,-4], [-5,-6,-7,-8], [-9,-10,-11,-12]]])

print(t.shape)
print(t)
print('\n')

print(t.transpose(0,1).shape)
print(t.transpose(0,1))
print('\n')

print(t.transpose(0,2).shape)
print(t.transpose(0,2))
print('\n')

torch.Size([2, 3, 4])
tensor([[[  1,   2,   3,   4],
         [  5,   6,   7,   8],
         [  9,  10,  11,  12]],

        [[ -1,  -2,  -3,  -4],
         [ -5,  -6,  -7,  -8],
         [ -9, -10, -11, -12]]])


torch.Size([3, 2, 4])
tensor([[[  1,   2,   3,   4],
         [ -1,  -2,  -3,  -4]],

        [[  5,   6,   7,   8],
         [ -5,  -6,  -7,  -8]],

        [[  9,  10,  11,  12],
         [ -9, -10, -11, -12]]])


torch.Size([4, 3, 2])
tensor([[[  1,  -1],
         [  5,  -5],
         [  9,  -9]],

        [[  2,  -2],
         [  6,  -6],
         [ 10, -10]],

        [[  3,  -3],
         [  7,  -7],
         [ 11, -11]],

        [[  4,  -4],
         [  8,  -8],
         [ 12, -12]]])




### Permute Tensor
This operation allows the user to simultaneously reorder multiple dimensions unlike transpose which interchanges two dimensions only


In [None]:
t = torch.tensor([[[1,2,3,4], [5,6,7,8], [9,10,11,12]],
                  [[-1,-2,-3,-4], [-5,-6,-7,-8], [-9,-10,-11,-12]]])

print(t.shape)
print(t)
print('\n')

print(t.permute(1,0,2).shape)
print(t.permute(1,0,2))
print('\n')

print(t.permute(2,0,1).shape)
# print(t.permute(1,2,0))
print('\n')

torch.Size([2, 3, 4])
tensor([[[  1,   2,   3,   4],
         [  5,   6,   7,   8],
         [  9,  10,  11,  12]],

        [[ -1,  -2,  -3,  -4],
         [ -5,  -6,  -7,  -8],
         [ -9, -10, -11, -12]]])


torch.Size([3, 2, 4])
tensor([[[  1,   2,   3,   4],
         [ -1,  -2,  -3,  -4]],

        [[  5,   6,   7,   8],
         [ -5,  -6,  -7,  -8]],

        [[  9,  10,  11,  12],
         [ -9, -10, -11, -12]]])


torch.Size([4, 2, 3])




## Combining Tensors
1. Concatenate
2. Stack
3. Padding

### Concatenate

All the other dimensions except the dim we are concatenating over must must identical. ex: T1 -> 2,3,4. T2 -> 2,4,5 . We can concatenate over dim 1 but not other dim

In [None]:
t1 = torch.rand(size=(2,3,4))
t2 = torch.rand(size=(2,3,4))

print('t1:\n', t1, "\n")
print('t2:\n', t2, "\n")

print('CONCATENATING TENSORS\n')

print('Concatenating two tensors along axis 1')
print(torch.cat([t1,t2],dim=1))
print('New Shape: ', torch.cat([t1,t2],dim=1).shape)

t1:
 tensor([[[0.3261, 0.6037, 0.6948, 0.7468],
         [0.6198, 0.2580, 0.0562, 0.7886],
         [0.8314, 0.6537, 0.0693, 0.9874]],

        [[0.8043, 0.1971, 0.3704, 0.3924],
         [0.6165, 0.7792, 0.4615, 0.7593],
         [0.7506, 0.5382, 0.9393, 0.1836]]]) 

t2:
 tensor([[[0.4427, 0.8091, 0.7164, 0.9262],
         [0.6127, 0.5886, 0.8655, 0.4438],
         [0.9267, 0.3905, 0.5919, 0.7644]],

        [[0.6218, 0.6886, 0.2830, 0.3222],
         [0.4749, 0.7328, 0.1297, 0.8170],
         [0.1371, 0.2127, 0.7259, 0.6870]]]) 

CONCATENATING TENSORS

Concatenating two tensors along axis 1
tensor([[[0.3261, 0.6037, 0.6948, 0.7468],
         [0.6198, 0.2580, 0.0562, 0.7886],
         [0.8314, 0.6537, 0.0693, 0.9874],
         [0.4427, 0.8091, 0.7164, 0.9262],
         [0.6127, 0.5886, 0.8655, 0.4438],
         [0.9267, 0.3905, 0.5919, 0.7644]],

        [[0.8043, 0.1971, 0.3704, 0.3924],
         [0.6165, 0.7792, 0.4615, 0.7593],
         [0.7506, 0.5382, 0.9393, 0.1836],
         [0

### Tensor Stack
This operation can be imagined as a combination of unsqueeze and cat.

In [None]:
t1 = torch.rand(size=(3,4))
t2 = torch.rand(size=(3,4))

print(t1.shape)
print(t1)
print('\n')

print(t2.shape)
print(t2)
print('\n')


print(torch.stack([t1,t2],dim=0)) #(3, 4) --> (1, 3, 4) --> (N, 3, 4)
print("New Shape:", torch.stack([t1,t2],dim=0).shape, '\n')

print(torch.stack([t1,t2],dim=1)) #(3, 4) --> (3, 1, 4) --> (3, N, 4)
print("New Shape:", torch.stack([t1,t2],dim=1).shape, '\n')

print(torch.stack([t1,t2],dim=2)) #(3, 4) --> (3, 4, 1) --> (3, 4, N)
print("New Shape:", torch.stack([t1,t2],dim=2).shape, '\n')

torch.Size([3, 4])
tensor([[0.0041, 0.4110, 0.3762, 0.9214],
        [0.2499, 0.6945, 0.9784, 0.9018],
        [0.5090, 0.2300, 0.1448, 0.1233]])


torch.Size([3, 4])
tensor([[0.2752, 0.6179, 0.6308, 0.2440],
        [0.8541, 0.3988, 0.3777, 0.6726],
        [0.6730, 0.0730, 0.9488, 0.9730]])


tensor([[[0.0041, 0.4110, 0.3762, 0.9214],
         [0.2499, 0.6945, 0.9784, 0.9018],
         [0.5090, 0.2300, 0.1448, 0.1233]],

        [[0.2752, 0.6179, 0.6308, 0.2440],
         [0.8541, 0.3988, 0.3777, 0.6726],
         [0.6730, 0.0730, 0.9488, 0.9730]]])
New Shape: torch.Size([2, 3, 4]) 

tensor([[[0.0041, 0.4110, 0.3762, 0.9214],
         [0.2752, 0.6179, 0.6308, 0.2440]],

        [[0.2499, 0.6945, 0.9784, 0.9018],
         [0.8541, 0.3988, 0.3777, 0.6726]],

        [[0.5090, 0.2300, 0.1448, 0.1233],
         [0.6730, 0.0730, 0.9488, 0.9730]]])
New Shape: torch.Size([3, 2, 4]) 

tensor([[[0.0041, 0.2752],
         [0.4110, 0.6179],
         [0.3762, 0.6308],
         [0.9214, 0.2440]],

### Tensor Padding

In [None]:
from torch.nn import functional as F

x = torch.tensor([[1,2,3,4],
                 [1,2,3,4],
                 [1,2,3,4],
                 [1,2,3,4]])

pad_left   = 2
pad_right  = 2
pad_top    = 1
pad_bottom = 2

x_pad = F.pad(x, (pad_left,pad_right,pad_top,pad_bottom), mode = 'constant', value=0)

print(x_pad)

tensor([[0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 2, 3, 4, 0, 0],
        [0, 0, 1, 2, 3, 4, 0, 0],
        [0, 0, 1, 2, 3, 4, 0, 0],
        [0, 0, 1, 2, 3, 4, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0]])


## Mathematical Operations
1. Point-wise/Element-wise operations
1. Redution operations
1. Comparison operations
1. Vector/Matrix operations

### Point-wise/Element-wise operations
Similar to numpy arrays, torch offers all the basic mathematical operations, we list some of the most commonly used operations here

Initialize tensors

In [None]:
t1 = torch.ones(3)
t2 = torch.ones(3)
t3 = torch.ones(size=(3,4))
t4 = torch.ones(size=(3,4))
t5 = torch.ones(size=(3,1))

print('original t1:')
print(t1.shape)

print('\noriginal t2:')
print(t2.shape)

print('\noriginal t3:')
print(t3.shape)

print('\noriginal t4:')
print(t4.shape)

print('\noriginal t5:')
print(t5.shape)

original t1:
torch.Size([3])

original t2:
torch.Size([3])

original t3:
torch.Size([3, 4])

original t4:
torch.Size([3, 4])

original t5:
torch.Size([3, 1])


Addition with scalar

In [None]:
print('t1+10')
print(t1+10)

t1+10
tensor([11., 11., 11.])


Addition with tensor

In [None]:
print('t1+t2')
print(t1+t2)

t1+t2
tensor([2., 2., 2.])


Multiplication with scalar

In [None]:
print('t1*3')
print(t1*3)

t1*3
tensor([3., 3., 3.])


Multiplication with tensor

In [None]:
print('t1*t2')
print(t1*t2)

t1*t2
tensor([1., 1., 1.])


Similar operations extend to multi-dimensional tensors

In [None]:
print('t3+t4')
print(t3+t4)

t3+t4
tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])


Broadcasting tensors of different dimensions

When broadcasting two multi-dimensional tensors, Pytorch will automatically match their corresponding dimensions and repeat the unmatched dimention to enable the operation.

In [None]:
print(t3.shape, t5.shape)
print(t3)
print(t5)
print(t3+t5)

torch.Size([3, 4]) torch.Size([3, 1])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[1.],
        [1.],
        [1.]])
tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])


### Reduction Operations
Torch supports all commonly used mathematical reduction operations such as sum(), mean(), std(), max(), argmax(), prod(), unique() etc. <br>
These can either be applied on the entire tensor or along specific dimensions.

In [None]:
t1 = torch.ones(3)
t2 = torch.ones(size=(3,4))

print('\noriginal t1:')
print(t1)

print('\noriginal t2:')
print(t2)

print('\nt1.sum()')
print(t1.sum())
print(torch.sum(t1))

print('\nt2.sum()')
print(t2.sum())
print(torch.sum(t2))

print('\nt2.sum(axis=0)')
print(t2.sum(axis=0))
print(torch.sum(t2, axis=0))

print('\nt2.sum(axis=1)')
print(t2.sum(axis=1))
print(torch.sum(t2, axis=1))


original t1:
tensor([1., 1., 1.])

original t2:
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

t1.sum()
tensor(3.)
tensor(3.)

t2.sum()
tensor(12.)
tensor(12.)

t2.sum(axis=0)
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])

t2.sum(axis=1)
tensor([4., 4., 4.])
tensor([4., 4., 4.])


### Comparison Operations

In [None]:
t1 = torch.rand(size=(3,4))
t2 = torch.rand(size=(3,4))
t3 = torch.rand(size=(3,4))

print('original t1:')
print(t1)

print('original t2:')
print(t2)

print('original t3:')
print(t3)

# Basic comparison operations
print('t1>t2')
print(t1>t2)

print('t2!=t3')
print(t2!=t3)

# Combining reduction operations with boolean tensors
print((t1>t2).any())
print((t1>t2).all())
print((t1>t2).any(axis=0))
print((t1>t2).any(axis=1))

print((t2!=t3).any())
print((t2!=t3).all())
print(torch.any(t2!=t3))
print(torch.all(t2!=t3))

original t1:
tensor([[0.5677, 0.9232, 0.5008, 0.6983],
        [0.1115, 0.7905, 0.2378, 0.6900],
        [0.0913, 0.4656, 0.2822, 0.5729]])
original t2:
tensor([[0.3087, 0.7130, 0.0622, 0.4899],
        [0.4264, 0.3975, 0.2108, 0.4215],
        [0.6761, 0.8960, 0.2045, 0.2309]])
original t3:
tensor([[0.1661, 0.4105, 0.0384, 0.5277],
        [0.5078, 0.4221, 0.9035, 0.4630],
        [0.7092, 0.2448, 0.3571, 0.7840]])
t1>t2
tensor([[ True,  True,  True,  True],
        [False,  True,  True,  True],
        [False, False,  True,  True]])
t2!=t3
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])
tensor(True)
tensor(False)
tensor([True, True, True, True])
tensor([True, True, True])
tensor(True)
tensor(True)
tensor(True)
tensor(True)


### Vector/Matrix operations

vector - vector

In [None]:
tensor1 = torch.randn(3)
tensor2 = torch.randn(3)

print('tensor1',tensor1)
print(tensor1.size())
print('tensor2',tensor2)
print(tensor2.size())

print('torch.matmul(tensor1, tensor2)')
print((tensor1 @ tensor2),(tensor1 @ tensor2).size())
print(torch.matmul(tensor1, tensor2),torch.matmul(tensor1, tensor2).size())

tensor1 tensor([ 1.6212,  2.4210, -0.6053])
torch.Size([3])
tensor2 tensor([-1.4084,  0.9791,  1.5379])
torch.Size([3])
torch.matmul(tensor1, tensor2)
tensor(-0.8439) torch.Size([])
tensor(-0.8439) torch.Size([])


matrix - vector

In [None]:
tensor1 = torch.randn(3, 4)
tensor2 = torch.randn(4)

print('\ntensor1')
print(tensor1.size())
print('tensor2')
print(tensor2.size())

print('torch.matmul(tensor1, tensor2)')
print(torch.matmul(tensor1, tensor2),torch.matmul(tensor1, tensor2).size())
print((tensor1 @ tensor2),(tensor1 @ tensor2).size())


tensor1
torch.Size([3, 4])
tensor2
torch.Size([4])
torch.matmul(tensor1, tensor2)
tensor([ 0.2525, -0.5445,  0.5629]) torch.Size([3])
tensor([ 0.2525, -0.5445,  0.5629]) torch.Size([3])


batched matrix - broadcasted vector

In [None]:
tensor1 = torch.randn(10, 3, 4)
tensor2 = torch.randn(4)

print('\ntensor1')
print(tensor1.size())
print('tensor2')
print(tensor2.size())

print('torch.matmul(tensor1, tensor2)')
# when doing various operations between n-dimensional matrix and
print(torch.matmul(tensor1, tensor2).size())
print((tensor1 @ tensor2).size())


tensor1
torch.Size([10, 3, 4])
tensor2
torch.Size([4])
torch.matmul(tensor1, tensor2)
torch.Size([10, 3])
torch.Size([10, 3])


batched matrix - batched matrix

In [None]:
tensor1 = torch.randn(10, 3, 4)
tensor2 = torch.randn(10, 4, 5)

print('\ntensor1')
print(tensor1.size())
print('tensor2')
print(tensor2.size())

print('torch.matmul(tensor1, tensor2)')
print(torch.matmul(tensor1, tensor2).size())
print((tensor1 @ tensor2).size())


tensor1
torch.Size([10, 3, 4])
tensor2
torch.Size([10, 4, 5])
torch.matmul(tensor1, tensor2)
torch.Size([10, 3, 5])
torch.Size([10, 3, 5])


batched matrix - broadcasted matrix

In [None]:
tensor1 = torch.randn(10, 3, 4)
tensor2 = torch.randn(4, 5)

print('\ntensor1')
print(tensor1.size())
print('tensor2')
print(tensor2.size())

print('torch.matmul(tensor1, tensor2)')
print(torch.matmul(tensor1, tensor2).size())
print((tensor1 @ tensor2).size())


tensor1
torch.Size([10, 3, 4])
tensor2
torch.Size([4, 5])
torch.matmul(tensor1, tensor2)
torch.Size([10, 3, 5])
torch.Size([10, 3, 5])


## Autograd

$$d = (3a + 2b)^2$$
$$Loss = MSE(d, e)$$

In [None]:
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)
c = 3*a + 2*b
d = c**2

print("a: ", a)
print("b: ", b)
print("c: ", c)
print("d: ", d)

a:  tensor([2., 3.], requires_grad=True)
b:  tensor([6., 4.], requires_grad=True)
c:  tensor([18., 17.], grad_fn=<AddBackward0>)
d:  tensor([324., 289.], grad_fn=<PowBackward0>)


In [None]:
# our target
e = torch.tensor([300., 280.])

# calculate the loss and do the bp
loss = torch.nn.MSELoss()(d, e)
loss.backward()

In [None]:
# We could check the gradient function and gradient for each variable
print("Gradient func for Loss: {} | Loss: {}".format(loss.grad_fn, loss.item()))
print("Gradient func for c: {} | Gradient for c: {}".format(c.grad_fn, c.grad))
print("Gradient func for d: {} | Gradient for d: {}".format(d.grad_fn, d.grad))

Gradient func for Loss: <MseLossBackward0 object at 0x7aa95fc77a30> | Loss: 328.5
Gradient func for c: <AddBackward0 object at 0x7aa95fc76a70> | Gradient for c: None
Gradient func for d: <PowBackward0 object at 0x7aaa1b246c80> | Gradient for d: None


  print("Gradient func for c: {} | Gradient for c: {}".format(c.grad_fn, c.grad))
  print("Gradient func for d: {} | Gradient for d: {}".format(d.grad_fn, d.grad))


In [None]:
print("Gradient func for a: {} | Gradient for a: {}".format(a.grad_fn, a.grad))
print("Gradient func for b: {} | Gradient for b: {}".format(a.grad_fn, b.grad))

Gradient func for a: None | Gradient for a: tensor([2592.,  918.])
Gradient func for b: None | Gradient for b: tensor([1728.,  612.])


It might be surprising that we could only externally see the gradient of $a$ and $b$. This is expected, as PyTorch only preserve the gradient for leaf nodes such as $a$ and $b$ to save memory after the backpropagation.



## Creating your own NN


1.   Dataset and Dataloaders
2.   Model
3.   Train, Test


In this example, we will use the Iris Dataset to do a regression task with our own NN.

In [None]:
!pip install torchsummaryX

Collecting torchsummaryX
  Downloading torchsummaryX-1.3.0-py3-none-any.whl (3.6 kB)
Installing collected packages: torchsummaryX
Successfully installed torchsummaryX-1.3.0


In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

from torchsummaryX import summary

In [None]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Load Iris dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

In [None]:
# Count the number of samples for each class in the training set
train_class_counts = {class_label: (y_train == class_label).sum() for class_label in set(y_train)}
print("Training Set - Number of Samples for Each Class:")
for class_label, count in train_class_counts.items():
    print(f"Class {class_label}: {count} samples")

# Count the number of samples for each class in the test set
test_class_counts = {class_label: (y_test == class_label).sum() for class_label in set(y_test)}
print("\nTest Set - Number of Samples for Each Class:")
for class_label, count in test_class_counts.items():
    print(f"Class {class_label}: {count} samples")

Training Set - Number of Samples for Each Class:
Class 0: 40 samples
Class 1: 41 samples
Class 2: 39 samples

Test Set - Number of Samples for Each Class:
Class 0: 10 samples
Class 1: 9 samples
Class 2: 11 samples


In [None]:
# Define a PyTorch dataset
class IrisDataset(Dataset):
    def __init__(self, data, targets):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.long)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

# Create PyTorch datasets for training and testing
train_dataset = IrisDataset(X_train, y_train)
test_dataset = IrisDataset(X_test, y_test)

# Create PyTorch dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class Network(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [None]:
input_size = X_train.shape[1]
hidden_size = 64
output_size = len(set(y_train))
model = Network(input_size, hidden_size, output_size)
feat,pred = next(iter(train_dataloader))
# Check number of parameters of your network
summary(model, feat)

       Kernel Shape Output Shape Params Mult-Adds
Layer                                            
0_fc1       [4, 64]     [32, 64]  320.0     256.0
1_relu            -     [32, 64]      -         -
2_fc2       [64, 3]      [32, 3]  195.0     192.0
---------------------------------------------------
                      Totals
Total params           515.0
Trainable params       515.0
Non-trainable params     0.0
Mult-Adds              448.0


  df_sum = df.sum()


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_fc1,"[4, 64]","[32, 64]",320.0,256.0
1_relu,-,"[32, 64]",,
2_fc2,"[64, 3]","[32, 3]",195.0,192.0


In [None]:
# Pushing model to device
model=model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
epochs = 50
print_every = 5

for epoch in range(1, epochs + 1):
    model.train()
    for inputs, targets in train_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    if epoch % print_every == 0 or epoch == 1:
        # Evaluate on the test set
        model.eval()
        with torch.no_grad():
            all_preds = []
            for inputs, targets in test_dataloader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                preds = torch.argmax(outputs, dim=1)
                all_preds.extend(preds.cpu().tolist())

        # Calculate metrics
        test_accuracy = accuracy_score(y_test, all_preds)
        f1 = f1_score(y_test, all_preds, average='weighted')

        print(f'Epoch {epoch}/{epochs}, Loss: {loss.item():.4f}, Test Accuracy: {test_accuracy * 100:.2f}%, F1 Score: {f1:.4f}')

Epoch 1/50, Loss: 0.9728, Test Accuracy: 70.00%, F1 Score: 0.5935
Epoch 5/50, Loss: 0.7778, Test Accuracy: 90.00%, F1 Score: 0.8992
Epoch 10/50, Loss: 0.6119, Test Accuracy: 70.00%, F1 Score: 0.5935
Epoch 15/50, Loss: 0.5228, Test Accuracy: 90.00%, F1 Score: 0.8960
Epoch 20/50, Loss: 0.5090, Test Accuracy: 90.00%, F1 Score: 0.8960
Epoch 25/50, Loss: 0.4029, Test Accuracy: 100.00%, F1 Score: 1.0000
Epoch 30/50, Loss: 0.3835, Test Accuracy: 100.00%, F1 Score: 1.0000
Epoch 35/50, Loss: 0.3545, Test Accuracy: 100.00%, F1 Score: 1.0000
Epoch 40/50, Loss: 0.2805, Test Accuracy: 100.00%, F1 Score: 1.0000
Epoch 45/50, Loss: 0.3358, Test Accuracy: 100.00%, F1 Score: 1.0000
Epoch 50/50, Loss: 0.2952, Test Accuracy: 100.00%, F1 Score: 1.0000
