In [None]:
# PyTorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim import lr_scheduler

In [None]:
# Data transformation and loading
transform = transforms.Compose(
    [transforms.Resize((224, 224)), transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])

# Download CIFAR-10 dataset
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 42.9MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
class LBBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(LBBlock, self).__init__()
        # First depthwise convolution (ϕ_d1) with batch normalization
        self.depthwise1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
        self.bn1 = nn.BatchNorm2d(in_channels)

        # Pointwise convolution (ϕ_p) with batch normalization
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Second depthwise convolution (ϕ_d2) with batch normalization
        self.depthwise2 = nn.Conv2d(out_channels, out_channels, kernel_size, stride, padding, groups=out_channels)
        self.bn3 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.depthwise1(x)                 # First depthwise convolution (ϕ_d1)
        x = self.bn1(x)                        # Batch normalization after ϕ_d1
        x = self.pointwise(x)                  # Pointwise convolution (ϕ_p)
        x = self.bn2(x)                        # Batch normalization after ϕ_p
        x = F.relu(x)                          # ReLU after pointwise
        x = self.depthwise2(x)                 # Second depthwise convolution (ϕ_d2)
        x = self.bn3(x)                        # Batch normalization after ϕ_d2
        x = F.relu(x)                          # ReLU after second depthwise
        return x

class DLBBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(DLBBlock, self).__init__()
        # 1x1 convolution to match dimensions if needed
        if in_channels != out_channels:
            self.match_channels = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        else:
            self.match_channels = None  # Ensure it's always defined

        # First depthwise convolution (ϕ_d1) with batch normalization only
        self.depthwise1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
        self.bn1 = nn.BatchNorm2d(in_channels)

        # Pointwise convolution (ϕ_p) with batch normalization and ReLU
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Second depthwise convolution (ϕ_d2) with batch normalization and ReLU
        self.depthwise2 = nn.Conv2d(out_channels, out_channels, kernel_size, stride, padding, groups=out_channels)
        self.bn3 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        residual = self.match_channels(x) if self.match_channels else x   # Save input for the first shortcut connection

        # First depthwise convolution (ϕ_d1)
        x = self.depthwise1(x)
        x = self.bn1(x)                              # Batch normalization only, no ReLU here

        # Pointwise convolution (ϕ_p)
        x = self.pointwise(x)
        x = self.bn2(x)
        x = F.relu(x)                                # ReLU after pointwise

        # First addition (adding the original input to the result after pointwise and ReLU)
        x = x + residual                             # First shortcut connection
        residual1 = x                                # Save the result for the second shortcut connection

        # Second depthwise convolution (ϕ_d2)
        x = self.depthwise2(x)
        x = self.bn3(x)                              # Batch normalization after ϕ_d2
        x = F.relu(x)                                # ReLU after second depthwise

        # Final addition
        x = x + residual
        x = x + residual1                            # Final shortcut connection
        return x

In [None]:
class EtinyNet(nn.Module):
    def __init__(self):
        super(EtinyNet, self).__init__()

        # Initial 3x3 convolution with stride 2 to downsample
        self.initial_conv = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1)

        # First pooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # First set of LBBlocks: [32, 32, 32, 32] -> 56x56 feature map
        self.lb1 = nn.Sequential(
            LBBlock(32, 32),
            LBBlock(32, 32),
            LBBlock(32, 32),
            LBBlock(32, 32)
        )

        # Second set of LBBlocks: [32, 128, 128, 128] -> 28x28 feature map
        self.lb2 = nn.Sequential(
            # First part: [32, 128, 128] x 1
            LBBlock(32, 128),  # Expands channels from 32 to 128

            # Second part: [128, 128, 128] x 3
            LBBlock(128, 128),
            LBBlock(128, 128),
            LBBlock(128, 128)
        )

        # First DLBBlock: [128, 192, 192] -> 14x14 feature map
        self.dlb1 = nn.Sequential(
            DLBBlock(128, 192),
            DLBBlock(192, 192),
            DLBBlock(192, 192)
        )

        # Second DLBBlock: [192, 256, 256] -> 7x7 feature map
        self.dlb2 = nn.Sequential(
            DLBBlock(192, 256),
            DLBBlock(256, 256),
            DLBBlock(256, 512)
        )

        # Global average pooling (7x7 feature map to 1x1)
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # Fully connected layer
        self.fc = nn.Linear(512, 10)  # For CIFAR-10, which has 10 classes

    def forward(self, x):
        #print(x.shape)
        x = self.initial_conv(x)                  # Initial 3x3 convolution with stride 2, 112^2
        #print(x.shape)
        x = self.pool(x)                           # First pooling layer, 56^2
        #print(x.shape)
        x = self.lb1(x)                            # First set of LB blocks, 56^2
        x = self.pool(x)                           # Pooling to reduce to 28x28
        #print(x.shape)
        x = self.lb2(x)                            # Second set of LB blocks
        x = self.pool(x)                           # Pooling to reduce to 14x14
        #print(x.shape)
        x = self.dlb1(x)                           # First set of DLB blocks
        x = self.pool(x)                           # Pooling to reduce to 7x7
        #print(x.shape)
        x = self.dlb2(x)                           # Second set of DLB blocks
        x = self.global_avg_pool(x)                # Global average pooling to get 1x1 feature map
        #print(x.shape)
        x = x.view(-1, 512)                        # Flatten for the fully connected layer
        x = self.fc(x)                             # Fully connected layer

        return x

In [None]:
# Check if GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training on:", device)

# Instantiate the model, define loss function, optimizer, and learning rate scheduler
net = EtinyNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)  # Reduce LR every 25 epochs

num_epochs = 50  # Start with 50 epochs
train_losses = []
val_losses = []

# Training loop with validation
for epoch in range(num_epochs):
    running_loss = 0.0
    net.train()  # Set model to training mode
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    # Track training loss
    train_losses.append(running_loss)

    # Validation pass
    net.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_losses.append(val_loss / len(testloader))
    accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%')

    # Step learning rate scheduler
    scheduler.step()

print('Finished Training')

Training on: cuda
Epoch: 1, Batch: 100, Loss: 12.7187
Epoch: 1, Batch: 200, Loss: 1.9465
Epoch: 1, Batch: 300, Loss: 1.7635
Epoch: 1, Batch: 400, Loss: 1.7809
Epoch: 1, Batch: 500, Loss: 1.7236
Epoch: 1, Batch: 600, Loss: 1.6880
Epoch: 1, Batch: 700, Loss: 1.6199
Epoch: 1, Batch: 800, Loss: 1.5958
Epoch: 1, Batch: 900, Loss: 1.6175
Epoch: 1, Batch: 1000, Loss: 1.5144
Epoch: 1, Batch: 1100, Loss: 1.5185
Epoch: 1, Batch: 1200, Loss: 1.4534
Epoch: 1, Batch: 1300, Loss: 1.4920
Epoch: 1, Batch: 1400, Loss: 1.4470
Epoch: 1, Batch: 1500, Loss: 1.4823
Epoch 1, Validation Loss: 481.9044, Validation Accuracy: 46.09%
Epoch: 2, Batch: 100, Loss: 1.4463
Epoch: 2, Batch: 200, Loss: 1.3798
Epoch: 2, Batch: 300, Loss: 1.3722
Epoch: 2, Batch: 400, Loss: 1.3547
Epoch: 2, Batch: 500, Loss: 1.3119
Epoch: 2, Batch: 600, Loss: 1.3057
Epoch: 2, Batch: 700, Loss: 1.3280
Epoch: 2, Batch: 800, Loss: 1.3199
Epoch: 2, Batch: 900, Loss: 1.3308
Epoch: 2, Batch: 1000, Loss: 1.2183
Epoch: 2, Batch: 1100, Loss: 1.2705

KeyboardInterrupt: 

In [None]:
correct = 0
total = 0
net = net.to(device)
net.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

Accuracy of the network on the 10000 test images: 86.55%


Original Implementation has 976K parameters. Since we are using CIFAR10 instead of imageNet, our fully connected layer only has 512 * 10 + 10 parameters instead of 512 * 1000 + 1000. 667914 - 5130 + 513000 = 1175784 parameters. This is around 200,000 more than the original implementation.

In [None]:
def get_model_size(model):
    # Calculate total parameter count
    total_params = sum(p.numel() for p in model.parameters())

    # Calculate total parameter size in bytes
    param_size = sum(p.numel() * p.element_size() for p in model.parameters())
    # Calculate total buffer size in bytes
    buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())

    # Convert to megabytes
    size_all_mb = (param_size + buffer_size) / (1024 ** 2)

    print(f"Model size: {size_all_mb:.2f} MB")
    print(f"Total number of parameters: {total_params}")
    print()
    for name, param in net.named_parameters():
        print(f"{name}: {param.numel()} parameters")
# Call the function for your model
get_model_size(net)

Model size: 2.60 MB
Total number of parameters: 667914

initial_conv.weight: 864 parameters
initial_conv.bias: 32 parameters
lb1.0.depthwise1.weight: 288 parameters
lb1.0.depthwise1.bias: 32 parameters
lb1.0.bn1.weight: 32 parameters
lb1.0.bn1.bias: 32 parameters
lb1.0.pointwise.weight: 1024 parameters
lb1.0.pointwise.bias: 32 parameters
lb1.0.bn2.weight: 32 parameters
lb1.0.bn2.bias: 32 parameters
lb1.0.depthwise2.weight: 288 parameters
lb1.0.depthwise2.bias: 32 parameters
lb1.0.bn3.weight: 32 parameters
lb1.0.bn3.bias: 32 parameters
lb1.1.depthwise1.weight: 288 parameters
lb1.1.depthwise1.bias: 32 parameters
lb1.1.bn1.weight: 32 parameters
lb1.1.bn1.bias: 32 parameters
lb1.1.pointwise.weight: 1024 parameters
lb1.1.pointwise.bias: 32 parameters
lb1.1.bn2.weight: 32 parameters
lb1.1.bn2.bias: 32 parameters
lb1.1.depthwise2.weight: 288 parameters
lb1.1.depthwise2.bias: 32 parameters
lb1.1.bn3.weight: 32 parameters
lb1.1.bn3.bias: 32 parameters
lb1.2.depthwise1.weight: 288 parameters
lb1

In [None]:
import os
from google.colab import drive
print(os.getcwd())
drive.mount('/content/drive')

/content
Mounted at /content/drive


In [None]:
net_cpu = net.cpu()
torch.save(net_cpu, '/content/drive/My Drive/ECE570/Project/EtinyNetModel.pth')
torch.save(net_cpu.state_dict(), '/content/drive/My Drive/ECE570/Project/EtinyNetDict.pth')