<a href="https://colab.research.google.com/github/harryypham/MyMLPractice/blob/main/MobileNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils as utils
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

%matplotlib inline

In [2]:
train_transform = transforms.Compose([
    transforms.Resize((226, 226)),
    transforms.RandomCrop((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(root="/content/data/train", train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(root="/content/data/test", train=False, download=True, transform=test_transform)

batch_size = 32


trainloader = utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=2)
testloader = utils.data.DataLoader(testset, batch_size=batch_size, num_workers=2)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43529450.08it/s]


Extracting /content/data/train/cifar-10-python.tar.gz to /content/data/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/test/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 41006395.47it/s]


Extracting /content/data/test/cifar-10-python.tar.gz to /content/data/test


In [22]:
# class ConvBlock(nn.Module):
#   def __init__(self, in_chans, out_chans, kernel_size, stride=1, padding=1):
#     super().__init__()
#     self.block = nn.Sequential(
#         nn.Conv2d(in_chans, out_chans, kernel_size, stride, padding),
#         nn.BatchNorm2d(out_chans),
#         nn.ReLU()
#     )

#   def forward(self, x):
#     return self.block(x)

class DepthwiseSeperableBlock(nn.Module):
  def __init__(self, in_chans, out_chans, stride, padding=1):
    super().__init__()
    self.block = nn.Sequential(
        nn.Conv2d(in_chans, in_chans, kernel_size=3, stride=stride, padding=padding, groups=in_chans, bias=False),
        nn.BatchNorm2d(in_chans),
        nn.ReLU(inplace=True),

        nn.Conv2d(in_chans, out_chans, kernel_size=1, stride=1, padding=0, bias=False),
        nn.BatchNorm2d(out_chans),
        nn.ReLU(inplace=True)
    )

  def forward(self, x):
    return self.block(x)


class MobileNet(nn.Module):
  layer_params = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]

  def __init__(self):
    super(MobileNet, self).__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=2),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True)
    )
    self.layers = self._make_layers()
    self.avgpool = nn.AdaptiveAvgPool2d(1)
    self.fc = nn.Linear(1024, 10)


  def _make_layers(self):
    layers = []
    in_chans = 32
    for layer in self.layer_params:
      out_chans = layer if isinstance(layer, int) else layer[0]
      stride = 1 if isinstance(layer, int) else layer[1]
      layers.append(DepthwiseSeperableBlock(in_chans, out_chans, stride))
      in_chans = out_chans

    return nn.Sequential(*layers)


  def forward(self, x):
    out = self.conv1(x)
    out = self.layers(out)
    out = self.avgpool(out)
    out = out.view(out.size(0), -1)
    out = self.fc(out)
    return out

In [26]:
class Block(nn.Module):
    '''expand + depthwise + pointwise'''
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out


class MobileNetV2(nn.Module):
    # (expansion, out_planes, num_blocks, stride)
    cfg = [(1,  16, 1, 1),
           (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
           (6,  32, 3, 2),
           (6,  64, 4, 2),
           (6,  96, 3, 1),
           (6, 160, 3, 2),
           (6, 320, 1, 1)]

    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        # NOTE: change conv1 stride 2 -> 1 for CIFAR10
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [27]:
model = MobileNetV2()
total_params = sum(p.numel() for p in model.parameters())
print(total_params)

2296922


In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
num_epochs = 100
model = MobileNet()
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

for epoch in range(num_epochs):
  train_loss = 0
  correct = 0
  total = 0
  print(f"Epoch {epoch+1}:")
  for batch_idx, (inputs, targets) in enumerate(trainloader):
      model.train()
      inputs, targets = inputs.to(device), targets.to(device)
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, targets)
      loss.backward()

      nn.utils.clip_grad_value_(model.parameters(), 0.1)

      optimizer.step()

      if epoch == 3 and (epoch+1)*batch_idx == 6000:
        print("Learning rate adjust ...")
        for param in optim.param_groups:
          param['lr'] = 0.0001

      with torch.no_grad():
        train_loss += loss.item()
        _, predict = outputs.max(1)
        total += targets.size(0)
        correct += predict.eq(targets).sum().item()

      if (not batch_idx % 100) and batch_idx != 0:
            print ('Batch %03d | Cost: %.6f | Train Acc: %.4f'
                  %(batch_idx, train_loss/(batch_idx+1), 100*correct/total))
  #43.5, 45.4, 52.4 (65-ish),

In [None]:
def compute_accuracy_test(model, dataloader, device):
    """
    Compute accuracy on test set
    """
    correct, total = 0, 0
    for inputs, targets in dataloader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        _, predict = outputs.max(1)

        total += targets.size(0)
        correct += predict.eq(targets).sum().item()

    return correct/total * 100


print(compute_accuracy_test(model, testloader, device))