In [None]:
# 파이토치 (이번엔 파이토치에 대해서 mnist 훈련 X, 모델만 짰음)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import argparse
import time
from copy import deepcopy
import matplotlib.pyplot as plt
from torchsummary import summary

In [1]:
class View(nn.Module):

    def __init__(self, *shape):
        super(View, self).__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(x.shape[0], *self.shape) # x.shape == (batch_size, channel, width, height) 
    
    # x = batch_size * channel * width * height(나가는 패러미터의 갯수)

class Sepconv(nn.Module):

    def __init__(self, in_channel, outchannel):
        super(Sepconv, self).__init__()
        sepconv = []
        # spatial 연산할 땐 in out 채널 갯수 똑같아야 됨. 이미지 사이즈는 일단 그대로 (공간 연산으로 줄일수는 있음)
        # groups는 in 채널 갯수만큼 그룹을 지어서 하나하나씩만 계산하도록 한다.
        sepconv += [nn.Conv2d(in_channels=in_channel, out_channels=in_channel, kernel_size=3, stride=1, padding=1, groups=in_channel, bias=False),
                    nn.Conv2d(in_channels=in_channel, out_channels=outchannel, kernel_size=1, stride=1, bias=False),
                    nn.BatchNorm2d(num_features=outchannel)]

        self.layers = nn.Sequential(*sepconv)


    def forward(self, x):
        return self.layers(x)

class Entry_flow(nn.Module):

    def __init__(self):
        super(Entry_flow, self).__init__()

        layer1 = []
        layer1 += [nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False), # mnist이므로 in 채널 1개
                   nn.BatchNorm2d(32),
                   nn.ReLU(inplace=True),
                   nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
                   nn.BatchNorm2d(64),
                   nn.ReLU(True)]
        layer2 = []
        layer2 += [Sepconv(64, 128), # Sepconv자체에 BN 들어가므로 Sepconv뒤에 굳이 BN 안넣음
                   nn.ReLU(True),
                   Sepconv(128, 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1)]
        layer3 = []
        layer3 += [nn.ReLU(True),
                   Sepconv(128, 256),
                   nn.ReLU(True),
                   Sepconv(256, 256),
                   nn.MaxPool2d(3, 2, padding=1)]
        layer4 = []
        layer4 += [nn.ReLU(True),
                   Sepconv(256, 728),
                   nn.ReLU(True),
                   Sepconv(728, 728),
                   nn.MaxPool2d(3, 2, padding=1)]

        self.layer1 = nn.Sequential(*layer1)
        self.layer2 = nn.Sequential(*layer2)
        self.layer3 = nn.Sequential(*layer3)
        self.layer4 = nn.Sequential(*layer4)
        # +를 해주려면 모든 텐서의 dimension이 같아야.
        self.skip_con1 = nn.Sequential(*[nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False), # layer1에 대한 것
                                         nn.BatchNorm2d(128)])
        self.skip_con2 = nn.Sequential(*[nn.Conv2d(128, 256, 1, 2, bias=False), # layer2에 대한 것
                                         nn.BatchNorm2d(256)])
        self.skip_con3 = nn.Sequential(*[nn.Conv2d(256, 728, 1, 2, bias=False), # layer3에 대한 것
                                         nn.BatchNorm2d(728)])

    def forward(self, x):
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        skip = self.skip_con1(x1)
        x2 = x2+skip
        x3 = self.layer3(x2)
        skip = self.skip_con2(x2)
        x3 = x3+skip
        x4 = self.layer4(x3)
        skip = self.skip_con3(x3)
        return x4+skip

class Middle_flow(nn.Module):

    def __init__(self):
        super(Middle_flow, self).__init__()

        layer = []
        layer += [nn.ReLU(True),
                  Sepconv(728, 728),
                  nn.ReLU(True),
                  Sepconv(728, 728),
                  nn.ReLU(True),
                  Sepconv(728, 728)]

        self.layers = nn.Sequential(*layer)

    def forward(self, x):
        out = self.layers(x)
        return out+x

class Exit_flow(nn.Module):

    def __init__(self):
        super(Exit_flow, self).__init__()

        layer1 = []
        layer1 += [nn.ReLU(True),
                   Sepconv(728, 728),
                   nn.ReLU(True),
                   Sepconv(728, 1024),
                   nn.MaxPool2d(3, 2, padding=1)]

        layer2 = []
        layer2 += [Sepconv(1024, 1536),
                   nn.ReLU(True),
                   Sepconv(1536, 2048),
                   nn.ReLU(True),
                   nn.AdaptiveAvgPool2d((1,1)),
                   View(-1), # flatten 과정
                   nn.Linear(2048, 1000)]

        self.layer1 = nn.Sequential(*layer1)
        self.layer2 = nn.Sequential(*layer2)
        self.skip_con = nn.Conv2d(in_channels=728, out_channels=1024, kernel_size=1, stride=2, bias=False)
        self.bn = nn.BatchNorm2d(1024)
        
        
    def forward(self, x):
        out1 = self.layer1(x)
        skip = self.skip_con(x)
        skip = self.bn(skip)
        output = self.layer2(out1+skip)
        return output
    
class Xception(nn.Module):
    
    def __init__(self):
        super(Xception, self).__init__()

        layers = []
        layers += [Entry_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Middle_flow(),
                   Exit_flow()]
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        y_pred = self.layers(x)
        return y_pred

if __name__ == '__main__':
    from torchsummary import summary
    model = Xception()
    summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 14, 14]             288
       BatchNorm2d-2           [-1, 32, 14, 14]              64
              ReLU-3           [-1, 32, 14, 14]               0
            Conv2d-4           [-1, 64, 14, 14]          18,432
       BatchNorm2d-5           [-1, 64, 14, 14]             128
              ReLU-6           [-1, 64, 14, 14]               0
            Conv2d-7           [-1, 64, 14, 14]             576
            Conv2d-8          [-1, 128, 14, 14]           8,192
       BatchNorm2d-9          [-1, 128, 14, 14]             256
          Sepconv-10          [-1, 128, 14, 14]               0
             ReLU-11          [-1, 128, 14, 14]               0
           Conv2d-12          [-1, 128, 14, 14]           1,152
           Conv2d-13          [-1, 128, 14, 14]          16,384
      BatchNorm2d-14          [-1, 128,

In [6]:
# 케라스

# 만약 stride=2이고 padding='same'이면 일단 stride=1인걸로 인식해서, 10*10이미지에 3*3 커널이라면 
# 양옆에 padding을 1씩 추가해준 후 막상 stride 재보니까 2인거다. 그렇게 하면 5*5 이미지로 변한다.

from keras.layers import Conv2D, MaxPooling2D, SeparableConv2D, BatchNormalization, Dense, GlobalAveragePooling2D, Activation, add, Input
from keras.models import Model

# Entry Flow (그림 참조)

inputs = Input(shape=(28, 28, 1)) # 이미지넷

x = Conv2D(32, kernel_size=3, strides=2, padding='same', use_bias=False)(inputs)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(64, kernel_size=3, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# (None, 150, 150, 64)
x2 = x

# 그림 참조

for size in [128, 256, 728]:
    
    x = Activation('relu')(x)
    x = SeparableConv2D(size, kernel_size=3, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(size, kernel_size=3, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(3, strides=2, padding='same')(x)
    
    residual_block = Conv2D(size, kernel_size=1, strides=2, padding='same', use_bias=False)(x2)
    residual_block = BatchNormalization()(residual_block)
    x = add([x, residual_block])
    x2 = x # skip connection 위해서 x2로 따로 정의
    
    # (None, 75, 75, 128) -> (None, 38, 38, 256) -> (None, 19, 19, 728)

# Middle flow

for _ in range(8):
    for _ in range(3):
        x = Activation('relu')(x)
        x = SeparableConv2D(728, kernel_size=3, padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
    x = add([x, x2])
    x2 = x
    
# (None, 19, 19, 728)

# Exit flow

x = Activation('relu')(x)
x = SeparableConv2D(728, kernel_size=3, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = SeparableConv2D(1024, kernel_size=3, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = MaxPooling2D(3, strides=2, padding='same')(x)

residual_block = Conv2D(1024, kernel_size=1, strides=2, padding='same', use_bias=False)(x2)
residual_block = BatchNormalization()(residual_block)
x = add([x, residual_block])

# (None, 10, 10, 1024)

x = SeparableConv2D(1536, kernel_size=3, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = SeparableConv2D(2048, kernel_size=3, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = GlobalAveragePooling2D()(x)

outputs = Dense(10, activation='softmax')(x)

model = Model(inputs, outputs)

model.summary()

model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_13 (Conv2D)              (None, 14, 14, 32)   288         input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_81 (BatchNo (None, 14, 14, 32)   128         conv2d_13[0][0]                  
__________________________________________________________________________________________________
activation_73 (Activation)      (None, 14, 14, 32)   0           batch_normalization_81[0][0]     
____________________________________________________________________________________________

activation_93 (Activation)      (None, 2, 2, 728)    0           add_31[0][0]                     
__________________________________________________________________________________________________
separable_conv2d_87 (SeparableC (None, 2, 2, 728)    536536      activation_93[0][0]              
__________________________________________________________________________________________________
batch_normalization_104 (BatchN (None, 2, 2, 728)    2912        separable_conv2d_87[0][0]        
__________________________________________________________________________________________________
activation_94 (Activation)      (None, 2, 2, 728)    0           batch_normalization_104[0][0]    
__________________________________________________________________________________________________
separable_conv2d_88 (SeparableC (None, 2, 2, 728)    536536      activation_94[0][0]              
__________________________________________________________________________________________________
batch_norm

In [None]:
# 모델 그림 보기

from IPython.display import SVG
from keras.utils import model_to_dot

SVG(model_to_dot(model, dpi=50).create(prog='dot', format='svg'))

In [None]:
# mnist 데이터 준비

import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils

width = 28
height = 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, width, height, 1).astype('float32')/255.0
x_test = x_test.reshape(10000, width, height, 1).astype('float32')/255.0

y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

Train on 54000 samples, validate on 6000 samples
Epoch 1/1


In [None]:
# 모델 훈련 

model.fit(x_train, y_train, batch_size=1000, epochs=10, verbose=2, validation_split=0.1)
score = model.evaluate(x_test, y_test) # [loss, acc]

print('loss: ', score[0])
print('acc: ', score[1])