# [2-3 CNN(ResNet)]

*KU LeeDongGyu*

### Note :

This code was used during the workshop at Kyungpook National University. <br>
Also, the content of this material is what I learned while moving Pycham to Jupyter.

Reference : TensorFlow 2.1 Quick Start Guide (by Holdroyd)

### Modules

In [6]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Activation
from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
from tensorflow.keras.layers import add
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import to_categorical
import numpy as np
import os

## 1. ResNet v2
---

In [4]:
"""
참고교재: Advanced Deep Learning with Keras

예제 2-1-3. ResNet classification on the CIFAR10 dataset  
  
ResNet v1
[a] Deep Residual Learning for Image Recognition
https://arxiv.org/pdf/1512.03385.pdf

ResNet v2
[b] Identity Mappings in Deep Residual Networks
https://arxiv.org/pdf/1603.05027.pdf
"""

# training parameters
batch_size = 32 # orig paper trained all networks with batch_size=128
epochs = 2 # 200
data_augmentation = True
num_classes = 10

# subtracting pixel mean improves accuracy
subtract_pixel_mean = True

In [3]:
# Model parameter
# ----------------------------------------------------------------------------
#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
# ----------------------------------------------------------------------------
# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
# ---------------------------------------------------------------------------

In [9]:
n = 3 #영역의 수. 한 영역은 6개의 층으로 구성되어있다. 그리고 2개의 층이 1개의 residual block으로 구성됨.

# model version
# orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)

version = 2
depth = n * 9 + 2

In [10]:
print(depth)

29


In [11]:
# model name, depth and version
model_type = 'ResNet%dv%d' % (depth, version)

In [12]:
model_type

'ResNet29v2'

In [13]:
# load the CIFAR10 data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# input image dimensions.
input_shape = x_train.shape[1:]

# normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [14]:
x_test.shape

(10000, 32, 32, 3)

In [15]:
# if subtract pixel mean is enabled  (데이터를 중심화 하기 위해서 뺀다 - stddev로 나눠주면 z정규화임.)
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0) # 데이터들에 대한 각 픽셀별 평균
    x_train -= x_train_mean #중심화
    x_test -= x_train_mean #중심화

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape) #2차원 텐서임을 확인하자.


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
y_train shape: (50000, 1)


In [16]:
# convert class vectors to binary class matrices.
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

### 1) Learning Schedule

In [17]:
# learning rate는 적당한 epoch를 돌리고나면, 급격히 줄임으로써 좀더 세밀한 지역의 최적화를 이어나간다.
# 고차원 loss function의 모습은 fractal처럼 맞물려있기 때문.
def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

### 2) Resnet Layer

In [18]:
# 우리가 쓸 resnet함수 정의.
# 옵션으로 여러개 설정해준다.
# 이 옵션들은 결국 tf.keras안에 있는 함수들로 연결될 것이다.
# 편의상 아래와 같은 층의 구성이름을 resnet_layer이라 하자.
# !!!! conv-bn-activation     or    bn-activation-conv  에 대한 함수를 만드는거임 !!!!
# 당연하게, bn은 activation function 앞에 있어야함. 이에따라 위의 두가지 경우가 생김.

def resnet_layer(inputs,
                 num_filters=16, #필터 개수
                 kernel_size=3, #필터사이즈
                 strides=1, #필터 strdie
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    """2D Convolution-Batch Normalization-Activation stack builder

    # Arguments
        inputs (tensor): input tensor from input image or previous layer
        num_filters (int): Conv2D number of filters
        kernel_size (int): Conv2D square kernel dimensions
        strides (int): Conv2D square stride dimensions
        activation (string): activation name
        batch_normalization (bool): whether to include batch normalization
        conv_first (bool): conv-bn-activation (True) or   # <----------- 이런 옵션이 있음을 확인하자.
            bn-activation-conv (False)

    # Returns
        x (tensor): tensor as input to the next layer
    """
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4)) #정규화는 이렇게 사용함을 확인.
    # tf.tensorflow.keras.regularizers.l2 임. l1, l2, l1_l2 세종류가 있음. 그밖에는 사용자정의로 만들어야함.
    # 이 필터들(conv 층)에서 적용할 규제의 타입을 결정짓는다.
    # tf.tensorflow.keras.activity_regularizer 도 있으며, activation function 의 모수 규제화를 진행한다. (ex. maxout)

    x = inputs
    if conv_first: # conv_first가 맞으면, 첫번째 conv는 규제화 진행. (위에서 옵션 정의한걸 보자.)
        x = conv(x)
        if batch_normalization: #batch normailzation이 있으면(True), 그걸 받아 사용한다.
            x = BatchNormalization()(x) # tf.keras.layers.BatchNormalization
        if activation is not None: # activation이 있으면(not None), 그걸 받아 사용한다.
            x = Activation(activation)(x) # tf.keras.layers.Activation
                                          # conv-bn-activatio
    else: #나머지 conv층이라면 아래를 진행.
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x) #bn-activation-conv
    return x

### 3) Resnet v2 Structure

In [23]:
## resnet version1 에 대해 알아보자.
# Identity Mappings in Deep Residual Networks 논문의 conv shortcut 버전임을 잊지말자.

In [21]:
## resnet version2 에 대해 알아보자.
# 기본적으로 (1 x 1)-(3 x 3)-(1 x 1)의 bottleneck layer이다.
# bottleneck layer를 사용하므로써, 층의 뻥튀기 효과도 있음.

def resnet_v2(input_shape, depth, num_classes=10):

    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')

    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9) 

    inputs = Input(shape=input_shape)


    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in, 
                     conv_first=True) 
    
    for stage in range(3): 
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0: 
                num_filters_out = num_filters_in * 4
                if res_block == 0:  
                    activation = None
                    batch_normalization = False
            else: 
                num_filters_out = num_filters_in * 2
                if res_block == 0:  
                    strides = 2  

            y = resnet_layer(inputs=x, # 1x1
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y, # 3x3
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y, # 1x1
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:

                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides, 
                                 activation=None,
                                 batch_normalization=False)
            x = add([x, y])

        num_filters_in = num_filters_out

    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)


    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    model = Model(inputs=inputs, outputs=outputs)
    return model

    

In [25]:
"""
## resnet version2 에 대해 알아보자.
# 기본적으로 (1 x 1)-(3 x 3)-(1 x 1)의 bottleneck layer이다.
# bottleneck layer를 사용하므로써, 층의 뻥튀기 효과도 있음.
def resnet_v2(input_shape, depth, num_classes=10):
    ResNet Version 2 Model builder [b]

    Stacks of {(1 x 1)-(3 x 3)-(1 x 1)} BN-ReLU-Conv2D or also known as
    bottleneck layer
    First shortcut connection per layer is 1 x 1 Conv2D.  #이 사실에 주의
    Second and onwards shortcut connection is identity.   #이 사실에 주의
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filter maps is
    doubled. Within each stage, the layers have the same number filters and the
    same filter map sizes.
    Features maps sizes:
    conv1  : 32x32,  16
    stage 0: 32x32,  64
    stage 1: 16x16, 128
    stage 2:  8x8,  256

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    
    if (depth - 2) % 9 != 0: # 이번엔 한 영역당 9개의 층이 있다.
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
    # Start model definition.
    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9) # 즉 56층(depth = 56) 이라면, 6개의 residual_block이 있게 되는것.

    inputs = Input(shape=input_shape)
    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths

    # 아래처럼 층 1개를 쌓음으로써 시작.
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in, # num_filters_i n =16
                     conv_first=True) # c - b - a (convolution - bn - activation)

    # Instantiate the stack of residual units
    for stage in range(3): # 역시나 영역은 3번
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0: # 영역이 첫번째라면 num_filters_out 변수에 필터 4배 저장.
                num_filters_out = num_filters_in * 4
                if res_block == 0:  # first layer and first stage -> activation x , bn x
                                    # 즉, 첫 영역의 첫 블럭에서는 convolution층만 쌓음.
                    activation = None
                    batch_normalization = False
            else: # 영역이 두번째, 세번째라면  num_filters_out 변수에 필터 2배 저장.
                num_filters_out = num_filters_in * 2
                if res_block == 0:  # first layer but not first stage
                    strides = 2    # downsample

            # bottleneck residual unit
            y = resnet_layer(inputs=x, # 1x1
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides, # downsampling 에 영향을 받게끔
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False) # 기본적으로 우리는 b - a - c 구조를 쌓는다.
            y = resnet_layer(inputs=y, # 3x3
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y, # 1x1
                             num_filters=num_filters_out, # 최종 out filter가 되게끔.
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0: #첫 층이라면, 기존으로 가는 x역시나 그대로 가지고 간다.
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides, # 얘도 downsampling
                                 activation=None,
                                 batch_normalization=False)
            x = add([x, y])

        num_filters_in = num_filters_out

    # add classifier on top.
    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)

    #마지막 한층으로 마무리.
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    # instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model

"""

print("This is an explanation code.")

This is an explanation code.


In [22]:
resnet_v2(input_shape=input_shape, depth=20).summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 16)   448         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 16)   64          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 16)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [23]:
model = resnet_v2(input_shape=input_shape, depth=depth)

In [24]:
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=lr_schedule(0)), # lr_schedule은 우리가 만든 함수로, argument로 epoch를 받음.
              metrics=['accuracy'])

Learning rate:  0.001


### 4) Path Setting for Data and Resnet v1 Structure

In [27]:
# prepare model model saving directory.

save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type #모델의 이름을 출력

if not os.path.isdir(save_dir): # save_dir경로가 없으면,
    os.makedirs(save_dir) #폴더를 추가 시켜라.
    
filepath = os.path.join(save_dir, model_name) #경로에 모형을 저장할거임.

### 5) Callback

In [33]:
# prepare callbacks for model saving and for learning rate adjustment.
# 모형이 epoch를 도는 순간에는, model.save를 사용할수가 없다.
# 이를 위해, callback이라는 기능을 제공한다. (자세한건, https://hwiyong.tistory.com/108 참고)
# 정한 epoch, 혹은 1 epoch가 끝날때마다 함수를 저장하고, 그 저장한 함수를 다시 불러서 사용하는 방식을 취하게 된다.
# 아래는 모델 저장하는 코드.### 4) Path Setting for Data and Resnet v1 Structure

In [28]:
checkpoint = ModelCheckpoint(filepath=filepath, # tensorflow.keras.callback.ModelCheckpoint
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

In [29]:
#함수를 argument로 사용한다. (우리가 만든 lr_schedule)

lr_scheduler = LearningRateScheduler(lr_schedule) # tensorflow.keras.callback.LearningRateScheduler

# learning rate(lr) decay을 행하되, 위는 '정적'에 관련된 사항이라면
# 아래는 '동적'에 관련된 사항이다. validation loss가 더 이상 감소하지 않을 때 lr을 감소시켜주는 경우가 이에 해당하며,
# 이 경우에도 역시 callback을 통해 더욱 진행할수 있다고 한다.

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), # tensorflow.keras.callback.ReduceLROnPlateau
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

# callbacks 에 대한 내용을 아래 세개로 하자.

callbacks = [checkpoint, lr_reducer, lr_scheduler]


### 6) Data augmentation

In [30]:
# run training, with or without data augmentation.
# 데이터 augmentation이 적용이 된경우와 아닌경우에 모델을 적합시키는데,
# 결국 같은 모형을 쓰는 것이다.
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True,
              callbacks=callbacks)

else:
    print('Using real-time data augmentation.')
    # this will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator( #tensorflow.keras.preprocessing.image.ImageDataGenerator 함수로 flipping, cropping을 한다.
        # set input mean to 0 over the dataset
        featurewise_center=False,
        # set each sample mean to 0
        samplewise_center=False,
        # divide inputs by std of dataset
        featurewise_std_normalization=False,
        # divide each input by its std
        samplewise_std_normalization=False,
        # apply ZCA whitening
        zca_whitening=False,
        # randomly rotate images in the range (deg 0 to 180)
        rotation_range=0,
        # randomly shift images horizontally
        width_shift_range=0.1,
        # randomly shift images vertically
        height_shift_range=0.1,
        # randomly flip images
        horizontal_flip=True,
        # randomly flip images
        vertical_flip=False)

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # fit the model on the batches generated by datagen.flow().
    
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        validation_data=(x_test, y_test),
                        epochs=epochs, verbose=1, workers=4,
                        steps_per_epoch=10, # 돌아가는지만 확인하기 위해, 임의로 숫자를 바꿈. 원래 코드는 아래와 같다.
                        callbacks=callbacks)
    
    """
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        validation_data=(x_test, y_test),
                        epochs=epochs, verbose=1, workers=4,
                        steps_per_epoch=len(x_train)//batch_size,
                        callbacks=callbacks)
    """


Using real-time data augmentation.
Instructions for updating:
Please use Model.fit, which supports generators.
  ...
    to  
  ['...']
Train for 10 steps, validate on 10000 samples
Learning rate:  0.001
Epoch 1/2
Learning rate:  0.001
Epoch 2/2


In [None]:
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

#결과는 당연하다... 10개이기때문



## 2. Reference
---

- [Resnet1](https://arxiv.org/pdf/1512.03385.pdf)
- [Resnet2](https://arxiv.org/pdf/1603.05027.pdf)
- [Callback](https://hwiyong.tistory.com/108)