# Imports

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, random_split
from torchvision import models
from tqdm import tqdm
from torchsummary import summary

# Creating the mobileNet Model

In [None]:
# Load the pre-trained MobileNet model
base_model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)

# Unfreeze some of the top layers
for param in base_model.features[:-10].parameters():
    param.requires_grad = False

# Modify the classifier for CIFAR-10 (10 classes)
class MobileNetV2(nn.Module):
    def __init__(self, base_model):
        super(MobileNetV2, self).__init__()
        self.features = base_model.features
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(1280, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            nn.Linear(64, 10)  # CIFAR-10 has 10 classes
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = MobileNetV2(base_model)

## Match the input tensor's device (cuda or cpu) with the model's device.

In [None]:
# Ensure the model is on the correct device (cuda or cpu)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

## Print model Summary

In [None]:
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
             ReLU6-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
             ReLU6-6         [-1, 32, 112, 112]               0
            Conv2d-7         [-1, 16, 112, 112]             512
       BatchNorm2d-8         [-1, 16, 112, 112]              32
  InvertedResidual-9         [-1, 16, 112, 112]               0
           Conv2d-10         [-1, 96, 112, 112]           1,536
      BatchNorm2d-11         [-1, 96, 112, 112]             192
            ReLU6-12         [-1, 96, 112, 112]               0
           Conv2d-13           [-1, 96, 56, 56]             864
      BatchNorm2d-14           [-1, 96,

# Loading dataset

In [None]:
# Fix the seed to ensure reproducibility
torch.manual_seed(42)

# Data augmentation for training (applied only to the train dataset)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), # Resize to MobileNet input size
    transforms.RandomHorizontalFlip(), # Flip horizontally
    transforms.RandomRotation(15), # Random rotation 15 degress
    transforms.ToTensor(), # Convert to tensor
    transforms.Normalize((0.5,), (0.5,)) # Normalize (mean=0.5, std=0.5)
])

# No augmentation for validation/test (only resizing and normalization)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:
# Download CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

# Split the test_data into test (6k) and dev (4k)
test_size = 6000
dev_size = 4000
test_dataset, dev_dataset = random_split(test_data, [test_size, dev_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
dev_loader = DataLoader(dev_dataset, batch_size=64, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Print dataset sizes
print(f"Train set size: {len(train_dataset)}")
print(f"Dev set size: {len(dev_dataset)}")
print(f"Test set size: {len(test_dataset)}")

Files already downloaded and verified
Files already downloaded and verified
Train set size: 50000
Dev set size: 4000
Test set size: 6000


# Training the model

In [None]:
# Training loop
def train_model(model, train_loader, dev_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        ### Training Phase ###
        model.train()  # Set model to training mode
        running_loss = 0.0
        correct, total = 0, 0

        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True)
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)           # Forward pass through the model
            loss = criterion(outputs, labels) # Calculate the loss

            # Backward pass and optimization
            optimizer.zero_grad() # Clear gradients from the previous iteration
            loss.backward()       # Compute gradients (backpropagation)
            optimizer.step()

            # Compute training metrics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Update progress bar
            loop.set_postfix(train_loss=running_loss/total, train_acc=100.*correct/total)

        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total

        ### Validation (Dev) Phase ###
        model.eval()  # Set model to evaluation mode
        dev_loss, dev_correct, dev_total = 0.0, 0, 0

        with torch.no_grad():  # No gradients for validation (Disable gradient calculations for efficiency)
            for images, labels in dev_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                dev_loss += loss.item()
                _, predicted = outputs.max(1)
                dev_total += labels.size(0)
                dev_correct += predicted.eq(labels).sum().item()

        dev_loss /= len(dev_loader)
        dev_acc = 100. * dev_correct / dev_total

        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Dev Loss: {dev_loss:.4f}, Dev Acc: {dev_acc:.2f}%")

    print("Training complete!")

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, dev_loader, criterion, optimizer, num_epochs=20)

Epoch 1/20: 100%|██████████| 782/782 [02:07<00:00,  6.12it/s, train_acc=66.9, train_loss=0.0157]


Epoch 1: Train Loss: 1.0041, Train Acc: 66.91%, Dev Loss: 0.5126, Dev Acc: 82.90%


Epoch 2/20: 100%|██████████| 782/782 [02:08<00:00,  6.09it/s, train_acc=81.7, train_loss=0.00945]


Epoch 2: Train Loss: 0.6042, Train Acc: 81.74%, Dev Loss: 0.4537, Dev Acc: 85.22%


Epoch 3/20: 100%|██████████| 782/782 [02:08<00:00,  6.09it/s, train_acc=84.3, train_loss=0.00812]


Epoch 3: Train Loss: 0.5192, Train Acc: 84.29%, Dev Loss: 0.4098, Dev Acc: 86.15%


Epoch 4/20: 100%|██████████| 782/782 [02:08<00:00,  6.07it/s, train_acc=86.1, train_loss=0.00725]


Epoch 4: Train Loss: 0.4633, Train Acc: 86.13%, Dev Loss: 0.3170, Dev Acc: 89.60%


Epoch 5/20: 100%|██████████| 782/782 [02:08<00:00,  6.09it/s, train_acc=87.1, train_loss=0.00665]


Epoch 5: Train Loss: 0.4253, Train Acc: 87.08%, Dev Loss: 0.3239, Dev Acc: 89.47%


Epoch 6/20: 100%|██████████| 782/782 [02:13<00:00,  5.88it/s, train_acc=88.1, train_loss=0.00616]


Epoch 6: Train Loss: 0.3938, Train Acc: 88.13%, Dev Loss: 0.3165, Dev Acc: 89.85%


Epoch 7/20: 100%|██████████| 782/782 [02:12<00:00,  5.90it/s, train_acc=88.9, train_loss=0.00575]


Epoch 7: Train Loss: 0.3677, Train Acc: 88.86%, Dev Loss: 0.3057, Dev Acc: 90.30%


Epoch 8/20: 100%|██████████| 782/782 [02:12<00:00,  5.88it/s, train_acc=89.4, train_loss=0.00547]


Epoch 8: Train Loss: 0.3497, Train Acc: 89.44%, Dev Loss: 0.2971, Dev Acc: 90.30%


Epoch 9/20: 100%|██████████| 782/782 [02:15<00:00,  5.77it/s, train_acc=90, train_loss=0.00518]


Epoch 9: Train Loss: 0.3313, Train Acc: 89.98%, Dev Loss: 0.2669, Dev Acc: 91.67%


Epoch 10/20: 100%|██████████| 782/782 [02:14<00:00,  5.81it/s, train_acc=90.4, train_loss=0.00502]


Epoch 10: Train Loss: 0.3207, Train Acc: 90.45%, Dev Loss: 0.2975, Dev Acc: 90.47%


Epoch 11/20: 100%|██████████| 782/782 [02:15<00:00,  5.78it/s, train_acc=90.7, train_loss=0.00476]


Epoch 11: Train Loss: 0.3045, Train Acc: 90.69%, Dev Loss: 0.2649, Dev Acc: 91.08%


Epoch 12/20: 100%|██████████| 782/782 [02:16<00:00,  5.71it/s, train_acc=91.6, train_loss=0.00438]


Epoch 12: Train Loss: 0.2798, Train Acc: 91.61%, Dev Loss: 0.2635, Dev Acc: 91.62%


Epoch 13/20: 100%|██████████| 782/782 [02:09<00:00,  6.04it/s, train_acc=91.6, train_loss=0.00434]


Epoch 13: Train Loss: 0.2776, Train Acc: 91.59%, Dev Loss: 0.2628, Dev Acc: 91.47%


Epoch 14/20: 100%|██████████| 782/782 [02:10<00:00,  6.01it/s, train_acc=92.1, train_loss=0.00409]


Epoch 14: Train Loss: 0.2616, Train Acc: 92.10%, Dev Loss: 0.2588, Dev Acc: 91.95%


Epoch 15/20: 100%|██████████| 782/782 [02:11<00:00,  5.93it/s, train_acc=92.3, train_loss=0.00391]


Epoch 15: Train Loss: 0.2501, Train Acc: 92.28%, Dev Loss: 0.2456, Dev Acc: 92.15%


Epoch 16/20: 100%|██████████| 782/782 [02:10<00:00,  6.01it/s, train_acc=92.6, train_loss=0.00381]


Epoch 16: Train Loss: 0.2438, Train Acc: 92.64%, Dev Loss: 0.2450, Dev Acc: 92.33%


Epoch 17/20: 100%|██████████| 782/782 [02:10<00:00,  5.98it/s, train_acc=92.8, train_loss=0.00366]


Epoch 17: Train Loss: 0.2340, Train Acc: 92.82%, Dev Loss: 0.2636, Dev Acc: 91.88%


Epoch 18/20: 100%|██████████| 782/782 [02:09<00:00,  6.02it/s, train_acc=93, train_loss=0.00356]


Epoch 18: Train Loss: 0.2274, Train Acc: 93.01%, Dev Loss: 0.2747, Dev Acc: 91.67%


Epoch 19/20: 100%|██████████| 782/782 [02:10<00:00,  6.01it/s, train_acc=93.4, train_loss=0.00337]


Epoch 19: Train Loss: 0.2154, Train Acc: 93.37%, Dev Loss: 0.2490, Dev Acc: 92.28%


Epoch 20/20: 100%|██████████| 782/782 [02:08<00:00,  6.06it/s, train_acc=93.5, train_loss=0.00327]


Epoch 20: Train Loss: 0.2090, Train Acc: 93.54%, Dev Loss: 0.2672, Dev Acc: 91.55%
Training complete!


# Evaluating the model's performance on the test dataset

In [None]:
def evaluate_model(model, test_loader, criterion, device):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():  # No gradients for validation (Disable gradient calculations for efficiency)
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Update loss
            test_loss += loss.item()

            # Get predictions
            _, predicted = outputs.max(1)

            # Update metrics
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    avg_loss = test_loss / len(test_loader)
    accuracy = 100. * correct / total

    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
    return avg_loss, accuracy

In [None]:
evaluate_model(model, test_loader, criterion, device)

Test Loss: 0.2781, Test Accuracy: 91.35%


(0.27812374422841885, 91.35)

# 🌐 Connect to google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
# check if pytorch_models directory doesn't exist & create it if it doesn't
if not os.path.exists('/content/drive/My Drive/saved_models/pytorch_models'):
    os.makedirs('/content/drive/My Drive/saved_models/pytorch_models')
    print("pytorch_models folder is created successfully")
else:
    print("pytorch_models folder already exists skipping folder creation...")

pytorch_models folder is created successfully


# Saving the model

In [None]:
model_save_path = '/content/drive/My Drive/saved_models/pytorch_models/mobilenet_cifar10.pth'
full_model_save_path = '/content/drive/My Drive/saved_models/pytorch_models/mobilenet_cifar10_full.pth'

## 1. Saving the weights only (recommended approach)
When loading the model later we have to define the architecture first like the following example:
```python
# define the model architecture first
model = MobileNetV2
# Load the saved weights
model.load_state_dict(torch.load('mobilenet_cifar10.pth'))
```

✅ Why is state_dict() the preferred method?
- Flexibility: to reuse the weights with different architectures.
- Compatibility: Avoids issues when saving/loading across PyTorch versions.
- Efficiency: Smaller file size and faster loading.

In [None]:
# Save model weights (recommended approach)
torch.save(model.state_dict(), model_save_path)
print(f"Model weights saved successfully at: {model_save_path}")

Model weights saved successfully at: /content/drive/My Drive/saved_models/pytorch_models/mobilenet_cifar10.pth


## 2. Saving the entire model (including architecture)
to load the model we can load it directly without defining the model architecture
```python
# Directly load the full model
model = torch.load('mobilenet_cifar10_full.pth')
```

In [None]:
# Save the entire model (optional if you want to keep the architecture too)
torch.save(model, full_model_save_path)
print(f"Full model saved successfully at: {full_model_save_path}")

Full model saved successfully at: /content/drive/My Drive/saved_models/pytorch_models/mobilenet_cifar10_full.pth
