In [2]:
# 파이토치

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import argparse
import time
from copy import deepcopy
import matplotlib.pyplot as plt
from torchsummary import summary

In [8]:
class View(nn.Module):
    
    def __init__(self, *shape): 
        super(View, self).__init__() 
        self.shape = shape
        
    def forward(self, x):
        return x.view(x.shape[0], *self.shape) # x.shape = [batch_size, channel, width, height]

class Residual_Block(nn.Module):  # Residual Block 만들기 
    
    def __init__(self, n_ch): # pre activation 적용한 것 (2번째 논문 5번 그림)
        super(Residual_Block, self).__init__() 
        layers = []
        layers += [nn.BatchNorm2d(num_features=n_ch),
                  nn.ReLU(inplace=True), 
                  nn.Conv2d(in_channels=n_ch, out_channels=n_ch, kernel_size=3, stride=1, padding=1, bias=False),
                  nn.BatchNorm2d(num_features=n_ch),
                  nn.ReLU(inplace=True),
                  nn.Conv2d(in_channels=n_ch, out_channels=n_ch, kernel_size=3, stride=1, padding=1, bias=False)]
        self.layers = nn.Sequential(*layers)
        
    def forward(self,x):
        out = self.layers(x)
        return x + out
    
class ResNet(nn.Module):
    
    def __init__(self):
        super(ResNet, self).__init__()
        
        # mnist니까 in_channels=1, 논문에서 out_channels=64, kernel_size=7, stride=2 (논문에선 사이즈 줄이기 위해 - 28*28이 14*14로)
        # 사이즈 안 줄게 하는 padding 공식 = (kernel_size-1)/2
        
        # 우리는 Residual_Block 2개씩만 하고 (논문은 3개씩), in_channel은 64랑 256만.
        
        layers = []
        layers += [nn.Conv2d(in_channels=1, out_channels=64, kernel_size=7, stride=2, padding=3), # batch * 64 * 14 * 14
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # batch * 64 * 7 * 7
                   Residual_Block(n_ch=64),
                   Residual_Block(n_ch=64),
                   nn.BatchNorm2d(64), # Residual_Block 거치면 x가 새롭게 추가 되니까 그것에 대해서도 BN (논문에선 안함)
                   nn.Conv2d(in_channels=64, out_channels=256, kernel_size=3, padding=1),
                   Residual_Block(n_ch=256),
                   Residual_Block(n_ch=256), # batch * 256 * 7 * 7
                   nn.AdaptiveAvgPool2d((1,1)), # batch * 256 * 1 * 1: (1,1)은 입력되는 커널 사이즈가 아니라 그렇게 나가도록 하라는 것
                   View(-1), # batch * 256 * 1 * 1 는 우리가 보기에 1차원이지만 컴퓨터가 보기엔 4차원이므로 그걸 해결하기 위함
                   nn.Linear(in_features=256, out_features=10)]
                      
        self.layers = nn.Sequential(*layers)
        
    def forward(self,x):
        return self.layers(x)
    
if __name__ == '__main__': # 이 파일이 메인으로 돌아갈때만 아래를 실행한다
    model = ResNet()
    summary(model, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 14, 14]           3,200
         MaxPool2d-2             [-1, 64, 7, 7]               0
       BatchNorm2d-3             [-1, 64, 7, 7]             128
              ReLU-4             [-1, 64, 7, 7]               0
            Conv2d-5             [-1, 64, 7, 7]          36,864
       BatchNorm2d-6             [-1, 64, 7, 7]             128
              ReLU-7             [-1, 64, 7, 7]               0
            Conv2d-8             [-1, 64, 7, 7]          36,864
    Residual_Block-9             [-1, 64, 7, 7]               0
      BatchNorm2d-10             [-1, 64, 7, 7]             128
             ReLU-11             [-1, 64, 7, 7]               0
           Conv2d-12             [-1, 64, 7, 7]          36,864
      BatchNorm2d-13             [-1, 64, 7, 7]             128
             ReLU-14             [-1, 6

In [None]:
# MNIST 데이터에 대해서 훈련 및 검증

# initial parameters

seed = 6 # weight 초깃값 설정: 이렇게 설정해주면 돌릴때마다 weight 안변함
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args('')
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'

args.batch_size = 256
args.model = ResNet()
args.loss_fn = nn.CrossEntropyLoss()
args.batch_size = 1024
args.lr = 0.01
args.epoch = 5

# 데이터 준비

train_datasets = MNIST(root='./datasets', train=True, transform=ToTensor(), download=True)
validation_datasets = MNIST(root='./datasets', train=False, transform=ToTensor(), download=True)

def acc(y_pred, y_true, batch_size):
    accuracy = torch.sum(torch.eq(torch.argmax(y_pred, dim=1), y_true)).item() / batch_size * 100 
    return accuracy

def train(model, datasets, optimizer, loss_fn, args):
    trainloader = DataLoader(dataset=datasets,
                             batch_size=args.batch_size,
                             shuffle=True,
                             drop_last=True) # drop_last: 데이터는 6만개인데 배치사이즈가 256이면 딱 안나눠지니까 나머지는 떨구는 것
    model.train() # 이번 모델은 train에 쓰겠다
    model.zero_grad()
    optimizer.zero_grad()

    train_acc = 0.0

    for i, (X, y) in enumerate (trainloader): # 데이터를 만들어주는 것
        X = X.to(args.device)
        y_true = y.to(args.device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y_true)
        accuracy = acc(y_pred=y_pred, y_true= y_true, batch_size=args.batch_size)

        model.zero_grad() # 초기화하라
        optimizer.zero_grad() # 초기화하라
        loss.backward() # 로스 (오류) 역전파해라
        optimizer.step() # 옵티마이저는 가중치를 갱신하라

        train_acc += accuracy

    train_acc = train_acc / len(trainloader)

    return train_acc

def validate(model, datasets, args):
    valloader = DataLoader(dataset=datasets,
                             batch_size=args.batch_size,
                             shuffle=False,
                             drop_last=True)
    model.eval() # 이번 모델은 val에 쓰겠다

    val_acc = 0.0
    
    with torch.no_grad():
        for i, (X, y) in enumerate(valloader):
            X = X.to(args.device)
            y_true = y.to(args.device)
            y_pred = model(X)

            accuracy = acc(y_pred=y_pred, y_true=y_true, batch_size=args.batch_size)
            val_acc += accuracy

    val_acc = val_acc / len(valloader)

    return val_acc

def training(train_datasets, val_datasets, args):
    model = args.model
    model.to(args.device)

    print('Device : ', args.device)

    loss_fn = args.loss_fn
    optimizer = optim.SGD(model.parameters(), lr=args.lr) # 모델의 parameter들과 learnig rate를 참고해라

    epoch_list = []
    train_acc_list = []
    val_acc_list = []

    for epoch in range(args.epoch):
        ts = time.time()

        train_acc = train(model=model, datasets=train_datasets, optimizer=optimizer, loss_fn=loss_fn, args=args)
        val_acc = validate(model=model, datasets=val_datasets, args=args)

        te = time.time()

        train_acc_list.append(train_acc)
        val_acc_list.append(val_acc)
        epoch_list.append(epoch)
        print('Epoch {}, acc(train/val) {:2.5f}/{:2.5f}. Took {:2.2f} sec'.format(epoch,train_acc,val_acc,te - ts))

    plt.title('ResNet')
    plt.plot(epoch_list, train_acc_list)
    plt.plot(epoch_list, val_acc_list)
    plt.xlabel('epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'])
    plt.show()

    torch.save(model.state_dict(), 'CNN.pt') # 모델의 가중치 상태를 dict형태로 저장

# 모델 훈련시키기    
    
training(train_datasets, validation_datasets, deepcopy(args))

# model = ResNet()
# model.load_stat_dict(torch.load('CNN.pt'))를 쓰면 원래 사용하던 모델을 불러와서 이어서 쓸 수 있다

In [11]:
# 케라스

from keras.layers import Input, Conv2D, MaxPooling2D, Dropout, BatchNormalization, Dense, Activation, add, Flatten, GlobalAveragePooling2D
from keras.models import Model
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils

width = 28
height = 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, width, height, 1).astype('float32')/255.0
x_test = x_test.reshape(10000, width, height, 1).astype('float32')/255.0

num_classes = 10
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

Using TensorFlow backend.


In [15]:
def Residual_Block(x, n_ch):
    skip_connection = x # 초기의 x
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(n_ch, kernel_size=(3,3), strides=1, padding='same')(x) # 크기 안변하게 하는 패딩이 케라스에선 same
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(n_ch, kernel_size=(3,3), strides=1, padding='same')(x)
    x = add([x, skip_connection])
    
    return x

inputs = Input(shape=(28,28,1))
x = Conv2D(64, kernel_size=7, strides=2, padding='valid')(inputs) # 크기가 변해야 하므로 (반으로 줄어야 하므로) 일단 valid
x = MaxPooling2D(pool_size=(3,3), strides=2, padding='same')(x)
x = Residual_Block(x, 64) # x = add([x, skip_connection])에 이미 x가 들어가 있기 때문에 x가 연결될 필요가 없다
x = Residual_Block(x, 64)
x = BatchNormalization()(x)
x = Conv2D(256, kernel_size=3, strides=2, padding='valid')(x)
x = Residual_Block(x, 256)
x = Residual_Block(x, 256)
x = GlobalAveragePooling2D()(x) # 빈 괄호는 option 없는 것

outputs = Dense(10, activation = 'softmax')(x)

model = Model(inputs=inputs, outputs=outputs)

model.summary()

model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (None, 11, 11, 64)   3200        input_4[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_4 (MaxPooling2D)  (None, 6, 6, 64)     0           conv2d_18[0][0]                  
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 6, 6, 64)     256         max_pooling2d_4[0][0]            
____________________________________________________________________________________________

In [None]:
# 모델 훈련

model.fit(x_train, y_train, batch_size=540, epochs=10, verbose = 2, validation_split=0.1) 
score = model.evaluate(x_test, y_test) # [loss, acc]

print('loss: ', score[0])
print('acc: ', score[1])