# **Importing Libraries**

In [1]:
import torch
import torch.nn as nn
from torchvision.datasets import CIFAR10
from  torch.utils.data import DataLoader, Dataset
from torch import nn, optim
import torchvision.transforms as transforms
import numpy as np
import pandas as pd
import copy
from torchvision.transforms import Normalize
from sklearn.model_selection import train_test_split
from torch  import is_tensor
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchvision.models as models
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights
from PIL import Image

In [2]:
!git clone https://github.com/ahmedtarek1325/CNN-tutorial.git

Cloning into 'CNN-tutorial'...
remote: Enumerating objects: 43, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 43 (delta 9), reused 38 (delta 7), pack-reused 0[K
Unpacking objects: 100% (43/43), 7.13 KiB | 730.00 KiB/s, done.


In [3]:
import sys
sys.path.append('/kaggle/working/CNN-tutorial')
from tools import EarlyStopping, tune_model
from data import train_test_split_ , perPixel_mean_std, perChannel_mean_std, build_transforms

# **Some Functions that will be used below**

In [4]:
def train_epoch2(model,dataloader,loss,optimizer,device):
    model.train()
    acc = []
    lss_history = []
    for _ , (data,labels) in enumerate(dataloader):
        data = data.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        pred = model(data)
        lss = loss(pred,labels)

        lss.backward()
        optimizer.step()


        # acc calculations
        lss_history.append(lss.item())
        acc.append(((pred.argmax(axis = 1) == labels).type(torch.float)).mean().item())


    return np.mean(lss_history) ,np.mean(acc)

def validate_epoch2(model,dataloader,loss,device):
    model.eval()
    acc = []
    lss_history = []
    with torch.no_grad():
        for i , (data,labels) in enumerate(dataloader):
            data = data.to(device)
            labels = labels.to(device)

            pred = model(data)
            lss = loss(pred,labels)
            lss_history.append(lss.item())
            acc.append(((pred.argmax(axis = 1) == labels).type(torch.float)).mean().item())
    return np.mean(lss_history) ,np.mean(acc)

def tune_model2(num_epochs,model,train_dataloader_,test_dataloader_,\
               loss,optimizer,device,scheduler=None,earlystopping=None) :
    '''
    NOTE that the scheduler here takes the update after every epoch not step
    '''

    hist = {'train_loss': [],
            'train_acc':[],
            'test_loss': [],
            'test_acc':[]}

    last_lr = optimizer.state_dict()['param_groups'][0]['lr']
    f= 0
    for e in range(num_epochs):
        lss,acc= train_epoch2(model,train_dataloader_,loss,optimizer,device)
        test_lss,test_acc= validate_epoch2(model,test_dataloader_,loss,device)


        print(f"For epoch {e:3d} || Training Loss {lss:5.3f} || acc {acc:5.3f}",end='')
        print(f" || Testing Loss {test_lss:5.3f} || Test acc {test_acc:5.3f}")
        hist['train_loss'].append(lss)
        hist['train_acc'].append(acc)
        hist['test_loss'].append(test_lss)
        hist['test_acc'].append(test_acc)

        #
        if earlystopping:
            if earlystopping(model,test_lss): # should terminate
                print('Early Stopping Activated')
                return hist
        # if you have scheduler
        if scheduler:
            scheduler.step(test_lss)
            try:
        # applying manual verbose for the scheduler
                if last_lr != scheduler.get_last_lr()[0]:
                    print(f'scheduler update at Epoch {e+1}')
                    last_lr = scheduler.get_last_lr()[0]
            except:
                f+=1
    return hist

class MyDataset2(Dataset):
    def __init__(self,x,y,transforms = None):
        self.x = x
        self.y = y
        self.transforms = transforms
    def __len__(self):
        return len(self.y)
    def __getitem__(self,idx):
        img = Image.fromarray(self.x[idx])
        if self.transforms is not None:
            x= self.transforms(img)
            return x.type(torch.float32),self.y[idx]

        return self.x[idx],self.y[idx]

# **Defining CNNBlock**

In [5]:
class CNNBlock(nn.Module):
    def __init__(self, cin, cout, stride=1, groups=1):
        super().__init__()
        self.downsample = False
        self.groups = groups
        self.cnn = nn.Conv2d(cin,cout,3,padding=1,stride=stride,bias=False)

        if groups==1: # same implementation of normal ResNet
            self.cnn2 = nn.Conv2d(cout,cout,3,padding=1,bias=False)
            if stride !=1:
                self.projection  = nn.Conv2d(cin,cout,1,stride=stride,bias=False)
                self.downsample = True

        else: # implementation for ResNext (grouped convolutions) --> My colleague Lydia helped me in this part
            # Reduce dimension
            self.cnn1 = nn.Conv2d(cout, cout // 2, 1, bias=False)
            # Grouped convolutions
            self.cnn2 = nn.Conv2d(cout // 2, cout // 2, 3, padding=1, groups=groups, bias=False)
            # Increase dimension
            self.cnn3 = nn.Conv2d(cout // 2, cout, 1, bias=False)

            if stride != 1:
                self.projection = nn.Conv2d(cin, cout, 1, stride=stride, bias=False, groups=groups)
                self.downsample = True

        self.act1 = nn.ReLU()
        self.act2 = nn.ReLU()
        self.BN1 = nn.BatchNorm2d(cout)
        self.BN2 = nn.BatchNorm2d(cout)
    
    def forward(self, x):
        out1 = self.act1(self.BN1(self.cnn(x)))

        if self.groups == 1:
            out2 = self.act2(self.BN2(self.cnn2(out1)))
            if self.downsample:
                return out2 + self.projection(x)
            return out2 + x
        else:
            grouping = self.cnn3(self.cnn2(self.cnn1(out1))) 
            if self.downsample:
                return self.act2(self.BN2(grouping))
            return self.act2(self.BN2(grouping) + x)

# **Defining ResNext Architecture**

In [6]:
class ResNext(nn.Module):
    def __init__(self, layersPerStage, filtersPerStage, groups=1):
        super().__init__()
        f1, f2, f3 = filtersPerStage
        self.baseCNN = nn.Conv2d(3, f1, 3, padding=1)
        self.base = nn.Sequential(nn.BatchNorm2d(f1), nn.ReLU())

        self.stage1 = nn.Sequential(*[CNNBlock(f1, f1, groups=groups) for _ in range(layersPerStage[0])])
        
        layers = [CNNBlock(f1, f2, 2, groups)]
        layers.extend([CNNBlock(f2, f2, groups=groups) for _ in range(layersPerStage[1] - 1)])
        self.stage2 = nn.Sequential(*layers)

        layers = [CNNBlock(f2, f3, 2, groups)]
        layers.extend([CNNBlock(f3, f3, groups=groups) for _ in range(layersPerStage[2] - 1)])
        self.stage3 = nn.Sequential(*layers)

        self.head = nn.Sequential(
            nn.AvgPool2d(2,2),
            nn.Flatten(),
            nn.Linear(f3*4*4, 10)
        )

    def forward(self, x):
        o1 = self.base(self.baseCNN(x))
        o2 = self.stage1(o1)
        o3 = self.stage2(o2)
        o4 = self.stage3(o3)
        return self.head(o4)

# **Model Definitions**

In [7]:
model1 = ResNext([5]*3, [16, 32, 64], groups=1)

model2 = ResNext([5]*3, [16, 32, 64], groups=4)

model3 = ResNext([5]*3, [32, 64, 128], groups=4)

# A fourth model of my choice. More grouping
model4 = ResNext([6, 7, 8], [32, 64, 128], groups=8)

In [8]:
class MyDataset(Dataset):
    def __init__(self,x,y,transforms = None):
        self.x = x
        self.y = y
        self.transforms = transforms
    def __len__(self):
        return len(self.y)
    def __getitem__(self,idx):
        if self.transforms is not None:
            x= self.transforms(image= self.x[idx])['image']
            return x.type(torch.float32),self.y[idx]

        return self.x[idx],self.y[idx]

In [9]:
train_data  = CIFAR10(root='./data',train = True,download = True)
test_data = CIFAR10(root='/data',train=False,download = True)

# make a validation set
config= {'random_state' : 42,
          'shuffle':True,
         'train_size':0.9 }
X_train, X_val, y_train, y_val= train_test_split_(train_data.data,train_data.targets,config)
X_train.shape,X_val.shape

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:06<00:00, 28334219.72it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:08<00:00, 20162215.02it/s]


Extracting /data/cifar-10-python.tar.gz to /data


((45000, 32, 32, 3), (5000, 32, 32, 3))

In [10]:
# stratify to keep the ratio 0.9 for every class in the trainined and testing
uni, counts = np.unique(y_val, return_counts=True)
counts

array([500, 500, 500, 500, 500, 500, 500, 500, 500, 500])

In [11]:
mean_,std_ = perChannel_mean_std(X_train,'NHWC')
mean_,std_

(array([125.24002457, 122.93835269, 113.84449184]),
 array([62.95162973, 62.07468216, 66.70886545]))

In [12]:
train_tansforms,test_transforms = build_transforms('pipeline1',{'mean':mean_,
                                                                 'std':std_})

In [13]:
trainset = MyDataset(X_train,y_train,train_tansforms)
trainloader_ = DataLoader(trainset,batch_size=128, shuffle=True)

valset = MyDataset(X_val,y_val,test_transforms)
valloader_ = DataLoader(trainset,batch_size=1024, shuffle=False)

testset = MyDataset(test_data.data,test_data.targets,test_transforms)
testloader_ = DataLoader(testset,batch_size=512, shuffle=False)

device= 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [14]:
loss = nn.CrossEntropyLoss()
earlystopping = EarlyStopping()

# **Model 1**

In [81]:
model1.to(device)
optimizer = optim.Adam(model1.parameters(),lr= 0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,'min',patience = 5,factor = 0.1,verbose=True)

In [84]:
hist_= tune_model(100,model1,trainloader_,valloader_,\
               loss,optimizer,device,scheduler,earlystopping=earlystopping)

For epoch   4 || Training Loss 0.602 || acc 0.790 || Testing Loss 0.681 || Test acc 0.764
For epoch   9 || Training Loss 0.421 || acc 0.854 || Testing Loss 0.427 || Test acc 0.853
For epoch  14 || Training Loss 0.319 || acc 0.888 || Testing Loss 0.355 || Test acc 0.877
For epoch  19 || Training Loss 0.257 || acc 0.911 || Testing Loss 0.247 || Test acc 0.914
For epoch  24 || Training Loss 0.217 || acc 0.925 || Testing Loss 0.212 || Test acc 0.925
For epoch  29 || Training Loss 0.190 || acc 0.934 || Testing Loss 0.162 || Test acc 0.944
For epoch  34 || Training Loss 0.161 || acc 0.942 || Testing Loss 0.144 || Test acc 0.950
For epoch  39 || Training Loss 0.144 || acc 0.949 || Testing Loss 0.199 || Test acc 0.931
Early Stopping Activated


In [85]:
print('best loss is',earlystopping.best_loss)

best loss is 0.14418904618783432


In [87]:
acc=validate_epoch2(model1,testloader_,loss,device)[1] # though the function is called validate_epoch2, it aims to see the accuracy of whatever the passed dataloader
# Therefore, since I am passing testloader rather than valloader, it will give me the testing accuracy
print("Model 1 accuracy: ", acc*100)

Model 1 accuracy:  87.49195784330368


# **Model 2**

In [15]:
model2.to(device)
optimizer2 = optim.Adam(model2.parameters(),lr= 0.001)
scheduler2 = optim.lr_scheduler.ReduceLROnPlateau(optimizer2,'min',patience = 5,factor = 0.1,verbose=True)

In [16]:
hist_2= tune_model(100,model2,trainloader_,valloader_,\
               loss,optimizer2,device,scheduler2,earlystopping=earlystopping)

For epoch   4 || Training Loss 0.814 || acc 0.715 || Testing Loss 0.832 || Test acc 0.709
For epoch   9 || Training Loss 0.603 || acc 0.790 || Testing Loss 0.846 || Test acc 0.720
For epoch  14 || Training Loss 0.503 || acc 0.825 || Testing Loss 0.530 || Test acc 0.815
For epoch  19 || Training Loss 0.431 || acc 0.848 || Testing Loss 0.600 || Test acc 0.792
For epoch  24 || Training Loss 0.378 || acc 0.868 || Testing Loss 0.408 || Test acc 0.859
For epoch  29 || Training Loss 0.336 || acc 0.882 || Testing Loss 0.328 || Test acc 0.884
For epoch  34 || Training Loss 0.308 || acc 0.893 || Testing Loss 0.297 || Test acc 0.897
For epoch  39 || Training Loss 0.276 || acc 0.903 || Testing Loss 0.289 || Test acc 0.899
For epoch  44 || Training Loss 0.253 || acc 0.912 || Testing Loss 0.251 || Test acc 0.911
For epoch  49 || Training Loss 0.238 || acc 0.917 || Testing Loss 0.227 || Test acc 0.920
For epoch  54 || Training Loss 0.221 || acc 0.922 || Testing Loss 0.218 || Test acc 0.924
Early Stop

In [17]:
print('best loss is',earlystopping.best_loss)

best loss is 0.20463847843083469


In [20]:
acc2=validate_epoch2(model2,testloader_,loss,device)[1]
print('Model 2 accuracy: ', acc2*100)

Model 2 accuracy:  85.74850648641586


# **Model 3**

In [38]:
model3.to(device)
optimizer3 = optim.Adam(model3.parameters(),lr= 0.001)
scheduler3 = optim.lr_scheduler.ReduceLROnPlateau(optimizer3,'min',patience = 5,factor = 0.1,verbose=True)

In [39]:
hist_3= tune_model(100,model3,trainloader_,valloader_,\
               loss,optimizer3,device,scheduler3,earlystopping=earlystopping)

For epoch   4 || Training Loss 0.663 || acc 0.769 || Testing Loss 0.673 || Test acc 0.766
For epoch   9 || Training Loss 0.473 || acc 0.836 || Testing Loss 0.497 || Test acc 0.828
For epoch  14 || Training Loss 0.372 || acc 0.871 || Testing Loss 0.413 || Test acc 0.858
For epoch  19 || Training Loss 0.300 || acc 0.896 || Testing Loss 0.297 || Test acc 0.896
For epoch  24 || Training Loss 0.251 || acc 0.913 || Testing Loss 0.281 || Test acc 0.900
For epoch  29 || Training Loss 0.216 || acc 0.924 || Testing Loss 0.201 || Test acc 0.929
For epoch  34 || Training Loss 0.179 || acc 0.937 || Testing Loss 0.183 || Test acc 0.936
For epoch  39 || Training Loss 0.156 || acc 0.945 || Testing Loss 0.165 || Test acc 0.942
For epoch  44 || Training Loss 0.136 || acc 0.953 || Testing Loss 0.182 || Test acc 0.937
For epoch  49 || Training Loss 0.119 || acc 0.958 || Testing Loss 0.110 || Test acc 0.962
For epoch  54 || Training Loss 0.113 || acc 0.960 || Testing Loss 0.129 || Test acc 0.955
Early Stop

In [41]:
print('best loss is',earlystopping.best_loss)

best loss is 0.11035843921655958


In [42]:
acc3=validate_epoch2(model3,testloader_,loss,device)[1]
print('Model 3 accuracy: ', acc3*100)

Model 3 accuracy:  88.58053773641586


# **Model 4**

In [15]:
model4.to(device)
optimizer4 = optim.Adam(model4.parameters(),lr= 0.001)
scheduler4 = optim.lr_scheduler.ReduceLROnPlateau(optimizer4,'min',patience = 5,factor = 0.1,verbose=True)

In [16]:
hist_4= tune_model(100,model4,trainloader_,valloader_,\
               loss,optimizer4,device,scheduler4,earlystopping=earlystopping)

For epoch   4 || Training Loss 0.762 || acc 0.737 || Testing Loss 0.872 || Test acc 0.718
For epoch   9 || Training Loss 0.502 || acc 0.827 || Testing Loss 0.490 || Test acc 0.835
For epoch  14 || Training Loss 0.381 || acc 0.866 || Testing Loss 0.378 || Test acc 0.870
For epoch  19 || Training Loss 0.308 || acc 0.894 || Testing Loss 0.327 || Test acc 0.888
For epoch  24 || Training Loss 0.250 || acc 0.912 || Testing Loss 0.261 || Test acc 0.907
For epoch  29 || Training Loss 0.208 || acc 0.927 || Testing Loss 0.198 || Test acc 0.931
For epoch  34 || Training Loss 0.172 || acc 0.940 || Testing Loss 0.157 || Test acc 0.945
For epoch  39 || Training Loss 0.148 || acc 0.948 || Testing Loss 0.133 || Test acc 0.954
For epoch  44 || Training Loss 0.131 || acc 0.954 || Testing Loss 0.121 || Test acc 0.958
For epoch  49 || Training Loss 0.111 || acc 0.962 || Testing Loss 0.121 || Test acc 0.958
For epoch  54 || Training Loss 0.100 || acc 0.965 || Testing Loss 0.098 || Test acc 0.965
For epoch 

In [19]:
print('best loss is',earlystopping.best_loss)

best loss is 0.07060314570976929


In [20]:
acc4=validate_epoch2(model4,testloader_,loss,device)[1]
print('Model 4 accuracy: ', acc4*100)

Model 4 accuracy:  88.83444398641586


# **Part 3: Pre-trained architectures (MobileNetV2)**

**Freezing all layers except the classification layer**

In [35]:
frozen_model = models.mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1)

# Freeze all the parameters in the model
for parameter in frozen_model.parameters():
    parameter.requires_grad = False # this ensures that I am not updating anything

# Preparing the classifier layer to be updated
frozen_model.classifier[1] = nn.Linear(frozen_model.classifier[1].in_features, 10)

# updating only the parameters of the classifier
frozen_optimizer = optim.Adam(frozen_model.classifier.parameters(), lr=0.0001)

## **Pre-processing the images before using MobileNetV2. According to this link: https://pytorch.org/hub/pytorch_vision_mobilenet_v2/**

In [59]:
preprocess_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

preprocess_val_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

trainset_mobilenet = MyDataset2(X_train, y_train, transforms=preprocess_train)
valset_mobileenet = MyDataset2(X_val, y_val, transforms=preprocess_val_test)
testset_mobilenet = MyDataset2(test_data.data, test_data.targets, transforms=preprocess_val_test)

trainloader_mobilenet = DataLoader(trainset_mobilenet, batch_size=128, shuffle=True)
valloader_mobilenet = DataLoader(valset_mobileenet, batch_size=1024, shuffle=False)
testloader_mobilenet = DataLoader(testset_mobilenet, batch_size=512, shuffle=False)

In [61]:
frozen_model.to(device)
scheduler_frozen = optim.lr_scheduler.ReduceLROnPlateau(frozen_optimizer,'min',patience = 5,factor = 0.1,verbose=True)
loss = nn.CrossEntropyLoss()
earlystopping = EarlyStopping()
hist_frozen= tune_model2(100,frozen_model,trainloader_mobilenet,testloader_mobilenet,\
               loss,frozen_optimizer,device,scheduler_frozen,earlystopping=earlystopping)

# Notice I am here passing testloader and not valloader. That means I am directly testing on the test dataset and not on a validation dataset.

For epoch   0 || Training Loss 0.886 || acc 0.699 || Testing Loss 0.795 || Test acc 0.737
For epoch   1 || Training Loss 0.862 || acc 0.706 || Testing Loss 0.781 || Test acc 0.743
For epoch   2 || Training Loss 0.849 || acc 0.710 || Testing Loss 0.768 || Test acc 0.744
For epoch   3 || Training Loss 0.845 || acc 0.711 || Testing Loss 0.764 || Test acc 0.742
For epoch   4 || Training Loss 0.835 || acc 0.714 || Testing Loss 0.750 || Test acc 0.749
For epoch   5 || Training Loss 0.826 || acc 0.715 || Testing Loss 0.748 || Test acc 0.747
For epoch   6 || Training Loss 0.820 || acc 0.719 || Testing Loss 0.739 || Test acc 0.750
For epoch   7 || Training Loss 0.816 || acc 0.717 || Testing Loss 0.736 || Test acc 0.749
For epoch   8 || Training Loss 0.805 || acc 0.720 || Testing Loss 0.730 || Test acc 0.752
For epoch   9 || Training Loss 0.803 || acc 0.722 || Testing Loss 0.722 || Test acc 0.756
For epoch  10 || Training Loss 0.801 || acc 0.724 || Testing Loss 0.724 || Test acc 0.755
For epoch 

**Training all layers**

In [62]:
full_model = models.mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1)

for param in full_model.parameters():
    param.requires_grad = True

full_model.classifier[1] = nn.Linear(full_model.classifier[1].in_features, 10)

full_optimizer = optim.Adam(full_model.parameters(), lr=0.001) # notice here that I did not write ".classifier", which means I am updating everything

In [63]:
full_model.to(device)
scheduler_full = optim.lr_scheduler.ReduceLROnPlateau(full_optimizer,'min',patience = 5,factor = 0.1,verbose=True)
loss = nn.CrossEntropyLoss()
earlystopping = EarlyStopping()
hist_full= tune_model2(100,full_model,trainloader_mobilenet,testloader_mobilenet,\
               loss,full_optimizer,device,scheduler_full,earlystopping=earlystopping)

# Notice I am here passing testloader and not valloader. That means I am directly testing on the test dataset and not on a validation dataset.

For epoch   0 || Training Loss 0.507 || acc 0.826 || Testing Loss 0.415 || Test acc 0.861
For epoch   1 || Training Loss 0.324 || acc 0.889 || Testing Loss 0.331 || Test acc 0.889
For epoch   2 || Training Loss 0.266 || acc 0.910 || Testing Loss 0.332 || Test acc 0.889
For epoch   3 || Training Loss 0.229 || acc 0.921 || Testing Loss 0.331 || Test acc 0.892
For epoch   4 || Training Loss 0.215 || acc 0.927 || Testing Loss 0.296 || Test acc 0.906
For epoch   5 || Training Loss 0.191 || acc 0.935 || Testing Loss 0.257 || Test acc 0.914
For epoch   6 || Training Loss 0.179 || acc 0.939 || Testing Loss 0.279 || Test acc 0.910
For epoch   7 || Training Loss 0.160 || acc 0.944 || Testing Loss 0.264 || Test acc 0.916
For epoch   8 || Training Loss 0.153 || acc 0.947 || Testing Loss 0.243 || Test acc 0.923
For epoch   9 || Training Loss 0.143 || acc 0.950 || Testing Loss 0.261 || Test acc 0.917
For epoch  10 || Training Loss 0.137 || acc 0.952 || Testing Loss 0.251 || Test acc 0.921
For epoch 

# **Discussion**

The fully-trained model is definitely much better. This is because MobileNet is originally trained on ImageNet data, not CIFAR-10. So, I think that exposing it to training on the new data improves its accuracy when compared to only training the classification layer. I didn't experiment much with augmentation, changing the optimizer, etc. due to time, but I think the conclusion would remain the same.

# **Brief essay on the model**

MobileNetV2 is a major breakthrough in the world of deep learning. Building on the original MobileNet's success, MobileNetV2 offered a number of obvious enhancements. The usage of inverted residual blocks with linear bottleneck layers, which permits improved representation learning while preserving computational efficiency, is one noteworthy breakthrough.Specifically, MobileNetV2 makes use of a unique linear bottleneck topology that promotes more efficient information flow across the network. The introduction of a linear bottleneck layer between inverted residuals, which improves gradient flow during training, is an important addition that other models haven't tackled. As the name suggests, it is a very "mobile" model, which means it is light and very computationally efficient.