<div style="display:block" direction=rtl align=right><br><br>
    <div  style="width:100%;margin:100;display:block"  display=block align=center>
        <img width=130 align=right src="https://i.ibb.co/yXKQmtZ/logo1.png" style="margin:0;" />
        <img width=170 align=left  src="https://i.ibb.co/wLjqFkw/logo2.png" style="margin:0;" />
        <span><br><font size=5>University of Tehran , school of ECE</font></span>
        <span><br><font size=3>Deep Learning</font></span>
        <span><br><font size=3>Spring 2023</font></span>
    </div><br><br><br>
    <div style="display:block" align=left display=block> 
        <font size=3>Pytorch tutorial - Model</font><br>
        <hr />
        <font size=3>TA: <a href="mailto:farshads7778@gmail.com">Farshad Sangari</a></font><br>
    </div>
</div>

# Import libraries

In [1]:
import os
import glob
import torch
from torchvision import datasets, transforms
import torch.nn as nn
from torch.utils.data import  Dataset
from PIL import Image
import torchvision

## Load data

In [2]:
DIR_TRAIN = "./data/CIFAR10/train/"
DIR_VAL = "./data/CIFAR10/val/"

In [3]:
classes = os.listdir(DIR_TRAIN)
print("Total Classes: ", len(classes))

train_imgs = []
val_imgs  = []
for _class in classes:
    train_imgs += glob.glob(DIR_TRAIN + _class + '/*.jpg')
    val_imgs += glob.glob(DIR_VAL + _class + '/*.jpg')

print("\nTotal train images: ", len(train_imgs))
print("Total test images: ", len(val_imgs))

Total Classes:  10

Total train images:  50000
Total test images:  10000


#### Method1. Read image: PIL library

In [4]:
cifar_transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.49139968, 0.48215827 ,0.44653124), (0.24703233,0.24348505,0.26158768))])

cifar_transforms_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.49139968, 0.48215827 ,0.44653124), (0.24703233,0.24348505,0.26158768))])

In [5]:
class CIFAR10Dataset(Dataset):
    def __init__(self, imgs_list, classes, transforms=None):
        super(CIFAR10Dataset, self).__init__()
        self.imgs_list = imgs_list
        self.class_to_int = {classes[i] : i for i in range(len(classes))}
        self.transforms = transforms
    def __getitem__(self, index):
    
        image_path = self.imgs_list[index]
        
        # Reading image
        image = Image.open(image_path)
        
        # Retriving class label
        label = image_path.split("/")[-2]
        label = self.class_to_int[label]
        
        # Applying transforms on image
        if self.transforms is not None:
            image = self.transforms(image)
        else:
            image = transforms.ToTensor()(image)
        return image, label
        

    def __len__(self):
        return len(self.imgs_list)

In [6]:
train_dataset = CIFAR10Dataset(imgs_list = train_imgs, classes = classes, transforms = cifar_transforms_train)
val_dataset = CIFAR10Dataset(imgs_list = val_imgs, classes = classes, transforms = cifar_transforms_val)

In [7]:
cifar_train_loader = torch.utils.data.DataLoader(train_dataset,
                                                batch_size=1024,
                                                shuffle=True)

cifar_val_loader = torch.utils.data.DataLoader(val_dataset,
                                                batch_size=1024,
                                                shuffle=False)

## Model

### Custom model

In [8]:
class ModelCNN(nn.Module):
    def __init__(self):
        super().__init__()

        
        self.relu = nn.ReLU()
        self.leaky_relu = nn.LeakyReLU(0.2,inplace=True)
        self.max_pool = nn.MaxPool2d(2, 2)
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        
        
        self.flat = nn.Flatten()
        
        self.linear3 = nn.Linear(512, 10)
        self.linear1 = nn.Linear(256*4*4, 1024)
        self.linear2 = nn.Linear(1024, 512)

        self.conv1.weight.data = torch.nn.init.xavier_normal_(self.conv1.weight.data,
                                                              gain = torch.nn.init.calculate_gain("leaky_relu"))

        
    def forward(self, x):
        # Extract features
        x = self.conv1(x)
        x = self.leaky_relu(x)
        x = self.max_pool(self.leaky_relu(self.conv2(x)))
        x = self.leaky_relu(self.conv3(x))
        x = self.max_pool(self.leaky_relu(self.conv4(x)))
        x = self.leaky_relu(self.conv5(x))
        x = self.max_pool(self.leaky_relu(self.conv6(x)))
        
        ## Flat feature map
        x = self.flat(x)
        
        # Classifier
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.linear3(x)
        
        return x

In [10]:
cnn_model = ModelCNN()
sample = next(iter(cifar_val_loader))

cnn_model(sample[0]).shape

torch.Size([1024, 10])

In [11]:
class ModelCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2,inplace=True),
            nn.MaxPool2d(2, 2) # output: 256 x 4 x 4
            )
        
        self.feature_extractor[0].weight.data = torch.nn.init.xavier_normal_(self.feature_extractor[0].weight.data,
                                                                     gain = torch.nn.init.calculate_gain("leaky_relu"))
        
        ## Bias --> Standard distribution
        self.feature_extractor[0].bias.data = torch.randn(self.feature_extractor[0].bias.data.shape)
        
        self.flat = nn.Flatten()

        self.classifier = nn.Sequential(
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.flat(x)
        x = self.classifier(x)
        return x

In [12]:
cnn_model = ModelCNN()
sample = next(iter(cifar_val_loader))

cnn_model(sample[0]).shape

torch.Size([1024, 10])

#### Functional vs Modular

In [24]:
relu = nn.ReLU()

In [26]:
relu(torch.tensor(-0.5))

tensor(0.)

In [29]:
torch.nn.functional.relu(torch.tensor(-0.5))

tensor(0.)

In [None]:
tensor1 = torch.tensor([1,1,2]).to(torch.float64)
tensor2 = torch.tensor([1,1,2]).to(torch.float64)
torch.nn.functional.cross_entropy(tensor1,tensor2)

tensor(4.2058, dtype=torch.float64)

In [None]:
type(torch.nn.CrossEntropyLoss())

torch.nn.modules.loss.CrossEntropyLoss

In [None]:
criterion = torch.nn.CrossEntropyLoss()

tensor1 = torch.tensor([1,1,2]).to(torch.float64)
tensor2 = torch.tensor([1,1,2]).to(torch.float64)
criterion(tensor1,tensor2)

tensor(4.2058, dtype=torch.float64)

#### Access to submodules

##### Solution1

In [31]:
sub_modules = list(cnn_model.children())

In [35]:
sub_modules[2]

Sequential(
  (0): Linear(in_features=4096, out_features=1024, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=10, bias=True)
)

##### Solution2

In [48]:
list(cnn_model.modules())

[ModelCNN(
   (feature_extractor): Sequential(
     (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (1): LeakyReLU(negative_slope=0.2, inplace=True)
     (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (3): LeakyReLU(negative_slope=0.2, inplace=True)
     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (6): LeakyReLU(negative_slope=0.2, inplace=True)
     (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (8): LeakyReLU(negative_slope=0.2, inplace=True)
     (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (11): LeakyReLU(negative_slope=0.2, inplace=True)
     (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (13): LeakyReLU(negative_slope=0.2, inplace

#### Access to parameters

##### Solution1

In [36]:
list(cnn_model.parameters())

[Parameter containing:
 tensor([[[[ 4.8499e-02,  5.1978e-03, -1.4092e-01],
           [-1.1446e-01,  1.7787e-01,  3.2464e-02],
           [ 6.5845e-02, -1.4161e-01,  1.5512e-02]],
 
          [[-6.0165e-02, -1.0797e-01, -6.7094e-02],
           [-1.1279e-01, -1.2810e-02,  1.8731e-01],
           [-4.0448e-02,  1.8412e-01, -4.3197e-02]],
 
          [[-8.4687e-02, -1.0731e-02,  3.5209e-02],
           [-1.2201e-01, -1.8440e-02, -6.3882e-02],
           [ 2.0623e-01,  6.1221e-02,  6.2314e-02]]],
 
 
         [[[-2.0538e-01,  4.6371e-02, -1.1592e-01],
           [ 5.9851e-03,  1.2135e-01, -4.8671e-02],
           [-3.3855e-02,  4.3064e-02,  1.5260e-01]],
 
          [[ 1.6182e-01, -1.6805e-01,  9.8460e-02],
           [ 1.3258e-02, -4.5338e-02, -1.4160e-01],
           [-3.5710e-02, -3.7878e-02,  9.6061e-02]],
 
          [[-1.3043e-01,  1.3205e-01,  1.8478e-01],
           [-8.6155e-02,  2.6419e-03, -8.6049e-02],
           [ 9.2245e-02, -8.5697e-02, -1.1221e-01]]],
 
 
         [[[ 8.39

##### Solution2

In [37]:
my_parameters = list(cnn_model.named_parameters())

In [39]:
my_parameters[1]

('feature_extractor.0.bias',
 Parameter containing:
 tensor([-1.1961,  1.0618,  0.3683,  0.6038,  1.4859, -2.7757,  0.3962,  1.6257,
          0.1099,  1.2686,  1.2978, -0.0537,  0.3330, -0.9905,  1.3020,  0.8015,
          1.2489, -1.3119,  0.3674,  0.9707, -0.4065,  0.9372, -0.0996, -0.2430,
         -0.9713,  0.1794, -2.0213, -0.0494,  0.3262, -1.3914,  0.3019,  0.1949],
        requires_grad=True))

### Pretrained model

In [40]:
alexnet = torchvision.models.alexnet(weights="AlexNet_Weights.DEFAULT")
alexnet


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [42]:
alexnet = torchvision.models.alexnet(weights="AlexNet_Weights.DEFAULT")
alexnet.features[0] = torch.nn.Conv2d(1, 64, kernel_size=(2,2), stride=(1), padding=(1,1))
alexnet.classifier[-1] = torch.nn.Linear(in_features=4096, out_features=10, bias=True)
# alexnet

In [44]:
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
   