<a href="https://colab.research.google.com/github/harryypham/MyMLPractice/blob/main/MobileNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils as utils
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

%matplotlib inline

In [2]:
train_transform = transforms.Compose([
    transforms.Resize((226, 226)),
    transforms.RandomCrop((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

trainset = torchvision.datasets.CIFAR10(root="/content/data/train", train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(root="/content/data/test", train=False, download=True, transform=test_transform)

batch_size = 16


trainloader = utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=2)
testloader = utils.data.DataLoader(testset, batch_size=batch_size, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [28]:
# class ConvBlock(nn.Module):
#   def __init__(self, in_chans, out_chans, kernel_size, stride=1, padding=1):
#     super().__init__()
#     self.block = nn.Sequential(
#         nn.Conv2d(in_chans, out_chans, kernel_size, stride, padding),
#         nn.BatchNorm2d(out_chans),
#         nn.ReLU()
#     )

#   def forward(self, x):
#     return self.block(x)

class DepthwiseSeperableBlock(nn.Module):
  def __init__(self, in_chans, out_chans, stride, padding=1):
    super().__init__()
    self.block = nn.Sequential(
        nn.Conv2d(in_chans, in_chans, kernel_size=3, stride=stride, padding=padding, bias=False),
        nn.BatchNorm2d(in_chans),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_chans, out_chans, kernel_size=1, stride=1, padding=0, bias=False),
        nn.BatchNorm2d(out_chans),
        nn.ReLU(inplace=True)
    )

  def forward(self, x):
    return self.block(x)


class MobileNet(nn.Module):
  def __init__(self):
    super(MobileNet, self).__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=2, bias=False),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True)
    )
    self.layers = self._make_layers(32)
    self.avgpool = nn.AdaptiveAvgPool2d(1)
    self.fc = nn.Linear(1024, 10)


  def _make_layers(self, in_chans):
    layers = []
    curr_chans = in_chans

    layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans*2, 1))
    curr_chans *= 2
    layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans*2, 2))
    curr_chans *= 2

    for i in range(2):
      layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans, 1))
      layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans*2, 2))
      curr_chans *= 2

    for i in range(5):
      print(curr_chans)
      layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans, 1))

    layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans*2, 2))
    curr_chans *= 2
    layers.append(DepthwiseSeperableBlock(curr_chans, curr_chans, 1))


    return nn.Sequential(*layers)

  def forward(self, x):
    out = self.conv1(x)
    out = self.layers(out)

    out = self.avgpool(out)
    out = out.view(out.size(0), -1)
    out = self.fc(out)
    return out

In [15]:
class MobileNetV1(nn.Module):
    def __init__(self, ch_in, n_classes):
        super(MobileNetV1, self).__init__()

        def conv_bn(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True)
                )

        def conv_dw(inp, oup, stride):
            return nn.Sequential(
                # dw
                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                nn.ReLU(inplace=True),

                # pw
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True),
                )

        self.model = nn.Sequential(
            conv_bn(ch_in, 32, 2),
            conv_dw(32, 64, 1),
            conv_dw(64, 128, 2),
            conv_dw(128, 128, 1),
            conv_dw(128, 256, 2),
            conv_dw(256, 256, 1),
            conv_dw(256, 512, 2),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 1024, 2),
            conv_dw(1024, 1024, 1),
            nn.AdaptiveAvgPool2d(1)
        )
        self.fc = nn.Linear(1024, n_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x

In [None]:
model = MobileNet()
print(model)

In [30]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
num_epochs = 100
model = MobileNetV1(3, 10)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

for epoch in range(num_epochs):
  train_loss = 0
  correct = 0
  total = 0
  for batch_idx, (inputs, targets) in enumerate(trainloader):
      model.train()
      inputs, targets = inputs.to(device), targets.to(device)
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, targets)
      loss.backward()
      optimizer.step()

      with torch.no_grad():
        train_loss += loss.item()
        _, predict = outputs.max(1)
        total += targets.size(0)
        correct += predict.eq(targets).sum().item()

      if (not batch_idx % 50) and batch_idx != 0:
            print ('Batch %03d | Cost: %.6f | Train Acc: %.4f'
                  %(batch_idx, train_loss/(batch_idx+1), 100*correct/total))

Batch 050 | Cost: 2.332462 | Train Acc: 14.4608
Batch 100 | Cost: 2.355747 | Train Acc: 17.0792
Batch 150 | Cost: 2.311798 | Train Acc: 18.1705
Batch 200 | Cost: 2.275413 | Train Acc: 19.2475
Batch 250 | Cost: 2.212787 | Train Acc: 20.9661
Batch 300 | Cost: 2.165282 | Train Acc: 22.4875


KeyboardInterrupt: 