In [None]:
import torch
import torchvision
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import os

# 일반 3x3 convolution
def conv_3x3(in_channel, out_channel, stride):
    return nn.Sequential(
        nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1),
        nn.BatchNorm2d(out_channel),
        nn.ReLU6(inplace=True)
    )

# 일반 1x1 convolution
def conv_1x1(in_channel, out_channel):
    return nn.Sequential(
        nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(out_channel),
        nn.ReLU6(inplace=True)
    )

# pointwise용 1x1 convolution
def pointwise_conv(in_channel, out_channel):
    return nn.Sequential(
        nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=1, padding=0),
        nn.BatchNorm2d(out_channel)
    )

# depthwise용 3x3 convolution
def depthwise_conv(in_channel, stride):
    return nn.Sequential(
        nn.Conv2d(in_channel, in_channel, kernel_size=3, stride=stride, groups=in_channel, padding=1),
        nn.BatchNorm2d(in_channel),
        nn.ReLU6(inplace=True)
    )

# 기본적으로 사용되는 역 Residual Block
class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual,self).__init__()
        self.stride = stride

        # expand_ratio = Inverted Residual block 안에서 채널을 얼마나 확장시키는지에 대한 값, 본 논문에서는 t=6으로 하여 확장시키고 있음
        # inverted residual 내부 확장된 채널값을 가지게 하기 위함
        hidden_dim = int(in_channel * expand_ratio)

        # skip connection이 가능한지 조건을 줌
        self.use_res_connect = self.stride == 1 and in_channel == out_channel

        layers = []

        # inverted residual 내부에서 확장시켜야 할 때
        if expand_ratio != 1:
            # 확장시키는 1x1 convolution 사용
            layers.append(conv_1x1(in_channel,hidden_dim))
        # 확장시키던지 안시키던지 그 후에는 depthwise와 pointwise를 진행하여 함, 이는 MobileNetV1에서 나온 이론에 근거
        layers.extend([
            # depth wise convolution
            depthwise_conv(hidden_dim, stride=stride),
            # point wise convolution
            pointwise_conv(hidden_dim, out_channel)
        ])

        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        ## skip connection 조건이 맞으면 해줌
        if self.use_res_connect:
            return x + self.layers(x)
        else :
            return self.layers(x)


class MobileNetV2(nn.Module):
    def __init__(self,img_channel, n_class):
        super(MobileNetV2,self).__init__()
        # 첫 번째 c 값
        input_channel = 32
        # 마지막 c 값
        last_channel = 1280

        # 본 논문에서 inverted residual block에 할당하는 값
        # t : 확장 비율, 기본으로 설정된 channel 값에 해당 숫자를 곱해서 사용
        # c : out_channel 값
        # n : 반복 횟수
        # s : 첫 번째 반복시 stride
        residual_setting = [
            #t, c,  n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1]
        ]

        # 첫 번째 layer
        self.first_conv = conv_3x3(img_channel, input_channel, stride = 2)

        # bottleneck 구조의 Inverted Residual block layers
        layers = []
        for t, c, n, s in residual_setting:
            # 반복의 첫 번째 s만 지정된 stride값으로 입력 그 외에는 1로 바꿈 그 이유는 Downsampling은 한 번만 해야하므로..
            for i in range(n):
                if i == 0:
                    stride = s
                else :
                    stride = 1
                layers.append(InvertedResidual(input_channel, c, stride = stride, expand_ratio= t))
                input_channel = c

        self.layers = nn.Sequential(*layers)

        # 마지막 layer
        self.last_conv = conv_1x1(input_channel, last_channel) #input = 320 / output = 1280
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(last_channel,n_class)

    def forward(self, x):
        x = self.first_conv(x)
        x = self.layers(x)
        x = self.last_conv(x)
        x = self.avg_pool(x).view(-1, 1280)
        x = self.classifier(x)
        return x

# 모델 확인
if __name__=="__main__":
    model = MobileNetV2(img_channel=3, n_class=1000)
    summary(model,(3,224,224), device = "cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             896
       BatchNorm2d-2         [-1, 32, 112, 112]              64
             ReLU6-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             320
       BatchNorm2d-5         [-1, 32, 112, 112]              64
             ReLU6-6         [-1, 32, 112, 112]               0
            Conv2d-7         [-1, 16, 112, 112]             528
       BatchNorm2d-8         [-1, 16, 112, 112]              32
  InvertedResidual-9         [-1, 16, 112, 112]               0
           Conv2d-10         [-1, 96, 112, 112]           1,632
      BatchNorm2d-11         [-1, 96, 112, 112]             192
            ReLU6-12         [-1, 96, 112, 112]               0
           Conv2d-13           [-1, 96, 56, 56]             960
      BatchNorm2d-14           [-1, 96,

In [None]:
from torchvision import models
mobilenet_v2 = models.mobilenet_v2()
summary(mobilenet_v2, (3,224,224), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
             ReLU6-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
             ReLU6-6         [-1, 32, 112, 112]               0
            Conv2d-7         [-1, 16, 112, 112]             512
       BatchNorm2d-8         [-1, 16, 112, 112]              32
  InvertedResidual-9         [-1, 16, 112, 112]               0
           Conv2d-10         [-1, 96, 112, 112]           1,536
      BatchNorm2d-11         [-1, 96, 112, 112]             192
            ReLU6-12         [-1, 96, 112, 112]               0
           Conv2d-13           [-1, 96, 56, 56]             864
      BatchNorm2d-14           [-1, 96,

In [None]:
# cuda 사용 가능 시 사용
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 변수설정
batch_size = 64
epochs = 10
lr = 0.001
data_dir = '/data/paper_review/cifa10'

# transform 설정
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])

# 모델호출
model = MobileNetV2(img_channel=3, n_class=10).to(device)

# 비용함수, 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
best_acc = 0

# 데이터 불러오기
train_dataset = CIFAR10(root=data_dir+'/train', train=True, download=True, transform=train_transform)
test_dataset = CIFAR10(root=data_dir+'/test', train=False, download=True, transform=test_transform)

# DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

def train(epoch):
    model.train()

    train_loss = 0
    correct = 0
    total = 0

    for index, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
        if (index+1) % 20 == 0:
            print(f'[Train] | epoch: {epoch+1}/{epochs} | batch: {index+1}/{len(train_loader)}| loss: {loss.item():.4f} | Acc: {correct / total * 100:.4f}')


def test(epoch):
    global best_acc
    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for index, (inputs, targets) in enumerate(test_loader):

            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

        print(f'[Test] epoch: {epoch+1} loss: {test_loss:.4f} | Acc: {correct / total * 100:.4f}')


    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'model': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc


for epoch in range(epochs):
    train(epoch)
    test(epoch)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data/paper_review/cifa10/train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 74536434.68it/s]


Extracting /data/paper_review/cifa10/train/cifar-10-python.tar.gz to /data/paper_review/cifa10/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data/paper_review/cifa10/test/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 80998946.08it/s]


Extracting /data/paper_review/cifa10/test/cifar-10-python.tar.gz to /data/paper_review/cifa10/test




[Train] | epoch: 1/10 | batch: 20/782| loss: 1.9848 | Acc: 22.8906
[Train] | epoch: 1/10 | batch: 40/782| loss: 1.8061 | Acc: 27.0703
[Train] | epoch: 1/10 | batch: 60/782| loss: 1.7682 | Acc: 28.5156
[Train] | epoch: 1/10 | batch: 80/782| loss: 1.5413 | Acc: 30.2539
[Train] | epoch: 1/10 | batch: 100/782| loss: 1.5731 | Acc: 32.0625
[Train] | epoch: 1/10 | batch: 120/782| loss: 1.4772 | Acc: 33.2161
[Train] | epoch: 1/10 | batch: 140/782| loss: 1.5761 | Acc: 34.6763
[Train] | epoch: 1/10 | batch: 160/782| loss: 1.5509 | Acc: 35.6055
[Train] | epoch: 1/10 | batch: 180/782| loss: 1.5187 | Acc: 36.5972
[Train] | epoch: 1/10 | batch: 200/782| loss: 1.5779 | Acc: 37.4062
[Train] | epoch: 1/10 | batch: 220/782| loss: 1.3153 | Acc: 38.4162
[Train] | epoch: 1/10 | batch: 240/782| loss: 1.4225 | Acc: 39.2448
[Train] | epoch: 1/10 | batch: 260/782| loss: 1.4563 | Acc: 39.8738
[Train] | epoch: 1/10 | batch: 280/782| loss: 1.2093 | Acc: 40.6641
[Train] | epoch: 1/10 | batch: 300/782| loss: 1.3265