# Training Logic

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
np.random.seed(7777)
tf.random.set_seed(7777)

In [3]:
class Cifar10DataLoader():
    def __init__(self):
        # data load
        (self.train_x, self.train_y), \
            (self.test_x, self.test_y) = tf.keras.datasets.cifar10.load_data()
        self.input_shape = self.train_x.shape[1:]

    def scale(self, x):

        return (x / 255.0).astype(np.float32)

    def preprocess_dataset(self, dataset):

        (feature, target) = dataset

        # scaling #
        scaled_x = np.array([self.scale(x) for x in feature])

        # label encoding #
        ohe_y = np.array([tf.keras.utils.to_categorical(
            y, num_classes=10) for y in target])
        
        return scaled_x, ohe_y.squeeze(1)

    def get_train_dataset(self):
        return self.preprocess_dataset((self.train_x, self.train_y))

    def get_test_dataset(self):
        return self.preprocess_dataset((self.test_x, self.test_y))

cifar10_loader = Cifar10DataLoader()
train_x, train_y = cifar10_loader.get_train_dataset()

print(train_x.shape, train_x.dtype)
print(train_y.shape, train_y.dtype)

test_x, test_y = cifar10_loader.get_test_dataset()

print(test_x.shape, test_x.dtype)
print(test_y.shape, test_y.dtype)

(50000, 32, 32, 3) float32
(50000, 10) float32
(10000, 32, 32, 3) float32
(10000, 10) float32


In [4]:
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, Add

def build_resnet(input_shape):
    inputs = Input(input_shape)

    net = Conv2D(32, kernel_size=3, strides=2,
                 padding='same', activation='relu')(inputs)
    net = MaxPool2D()(net)
    
    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)
    
    net1_1 = Conv2D(64, kernel_size=1, padding='same')(net)
    net = Add()([net1_1, net3])
    
    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)
    
    net = Add()([net, net3])
    
    net = MaxPool2D()(net)
    
    net = Flatten()(net)
    net = Dense(10, activation="softmax")(net)

    model = tf.keras.Model(inputs=inputs, outputs=net, name='resnet')
    
    return model

model = build_resnet((32, 32, 3))
model.summary()

Model: "resnet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 16, 16, 32)           896       ['input_1[0][0]']             
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 8, 8, 32)             0         ['conv2d[0][0]']              
 D)                                                                                               
                                                                                                  
 conv2d_1 (Conv2D)           (None, 8, 8, 64)             2112      ['max_pooling2d[0][0]']  

## 학습하는 과정을 직접 만들어보자!

`.fit()` 대신 직접 하드 코딩해보는 시간.

자유도가 높은 모델을 생성하게 된다면 이렇게 학습을 할 필요가 있다!

In [5]:
learning_rate = 0.03
# optimizer
opt = tf.keras.optimizers.Adam(learning_rate) # 클래스
# loss 
loss_fn = tf.keras.losses.categorical_crossentropy # 함수로 만든 것
# metrics
train_loss = tf.keras.metrics.Mean(name='train_loss') # 클래스 객체로 만든 것
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

In [None]:
# 학습 과정을 수도 코드로 만들어보자면 아래와 같다.

'''
for e in epochs:
    for batch_x, batch_y in dataset:
        pred = model(batch_x)
        loss_fn(batch_y, pred)

        gradients
        weight_update

        print
'''

In [6]:
# 학습 과정(pred, loss gradient update까지)
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x) # 예측값
        loss = loss_fn(y, pred) # loss 계산
        
    gradients = tape.gradient(loss, model.trainable_variables) # gradient 계산
    opt.apply_gradients(zip(gradients, model.trainable_variables)) # 업데이트
    
    # metric
    train_loss(y)
    train_accuracy(y, pred)

In [8]:
batch_size = 64

for epoch in range(1): # epoch을 도는데 
    
    for i in range(train_x.shape[0] // batch_size): # 배치 데이터마다 계산
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size] # 배치 데이터 추출
        train_step(x, y)  # 학습 과정
        print("\r {} / {}".format(i, train_x.shape[0] // batch_size), end='\r') # 진행률 출력
    
    # 한 epoch에 대한 loss와 acc 출력
    fmt = 'epoch {} loss: {}, accuracy: {}'
    print(fmt.format(epoch+1, 
                          train_loss.result(), # .result() : 객체의 값을 반환해줌 
                          train_accuracy.result() * 100)
         )
    
    # 초기화
    # 매 epoch마다 새로운 metric 값을 계산해야지,
    # 만약 초기화를 해주지 않으면 이전 epoch의 metric 값이 계속 누적된 채로 계산됨
    train_loss.reset_states()
    train_accuracy.reset_states()

epoch 1 loss: 0.10000000149011612, accuracy: 9.99119758605957


In [None]:
@tf.function
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)
        
    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(y, pred)

# @tf.function으로 함수 위에 붙여주면 GPU로 학습시켰을 때 학습 속도가 향상되는 효과를 볼 수 있음
# 그냥 함수로 정의만 해두면 매 배치 데이터를 돌 때마다 계속 함수를 새로 불러오는 형식으로 진행되는데
# @tf.function를 해두면 한 번 불러오고 저장해두므로 불러 쓰기가 더 쉬워진다.

In [None]:
batch_size = 64
num_of_batch_train = train_x.shape[0] // batch_size

for epoch in range(1):
    for i in range(num_of_batch_train):
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size]
        train_step(x, y)
        print("\r {} / {}".format(i, num_of_batch_train), end='\r')
    
    fmt = 'epoch {} loss: {}, accuracy: {}'
    print(fmt.format(epoch+1, 
                          train_loss.result(),
                          train_accuracy.result() * 100)
         )
    # Reset metrics every epoch
    train_loss.reset_states()
    train_accuracy.reset_states()