# 1. 신경망 복습

## Trainer class 구현

In [1]:
# coding: utf-8
import sys
sys.path.append('..')
import numpy
import time
import matplotlib.pyplot as plt
from common.np import *  # import numpy as np
from common.util import clip_grads

In [3]:
class Trainer :
    def __init__(self, model, optimizer) :
        self.model = model
        self.optimizer = optimizer
        self.loss_list = []
        self.eval_interval = None
        self.currnet_epoch = 0
        
    def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20) :
        data_size = len(x)
        max_iters = data_size // batch_size
        self.eval_interval = eval_interval
        model, optimizer = self.model, self.optimizer
        total_loss = 0
        loss_count = 0

        start_time = time.time()
        for epoch in range(max_epoch) :
            # mix
            idx = numpy.random.permutation(numpy.arange(data_size))
            x = x[idx]
            t = t[idx]
            
            for iters in range(max_iters) :
                batch_x = x[iters*batch_size : (iters+1)*batch_size]
                batch_t = t[iters*batch_size : (iters+1)*batch_size]

                # 기울기 구해 매개변수 갱신
                loss = model.forward(batch_x, batch_t)
                model.backward()
                params, grads = remove_duplicate(model.params, model.grads)
                if max_grads is not None :
                    clip_grads(grads, max_grad)
                optimizer.update(params, grads)
                total_loss += loss
                loss_count += 1

                # evaluation
                if (eval_interval is not None) and (iters % eval_interval) == 0 :
                    avg_loss = total_loss / loss_count
                    elapsed_time = time.time() - start_time
                    print(f"| 에폭 {self.current_epoch + 1} | 반복 {iters + 1} | 시간 {max_iters}[초] | 손실 {avg_loss}")
                    self.loss_list.append(float(avg_loss))
                    total_loss, loss_count = 0, 0
            self.current_epoch += 1

    




    def plot(self, ylim=None) :
        x = numpy.arange(len(self.loss_list))
        if ylim is not None :
            plt.ylim(*ylim)
        plt.plot(x, self.loss_list, label='train')
        plt.xlabel("반복 (x" + str(self.eval_interval) + ")")
        plt.ylabel("loss")
        plt.show()



In [2]:
def remove_duplicate(params, grads):
    '''
    매개변수 배열 중 중복되는 가중치를 하나로 모아
    그 가중치에 대응하는 기울기를 더한다.
    '''
    params, grads = params[:], grads[:]  # copy list

    while True:
        find_flg = False
        L = len(params)

        for i in range(0, L - 1):
            for j in range(i + 1, L):
                # 가중치 공유 시
                if params[i] is params[j]:
                    grads[i] += grads[j]  # 경사를 더함
                    find_flg = True
                    params.pop(j)
                    grads.pop(j)
                # 가중치를 전치행렬로 공유하는 경우(weight tying)
                elif params[i].ndim == 2 and params[j].ndim == 2 and \
                     params[i].T.shape == params[j].shape and np.all(params[i].T == params[j]):
                    grads[i] += grads[j].T
                    find_flg = True
                    params.pop(j)
                    grads.pop(j)

                if find_flg: break
            if find_flg: break

        if not find_flg: break

    return params, grads

In [None]:
def clip_grads(grads, max_norm):
    total_norm = 0
    for grad in grads:
        total_norm += np.sum(grad ** 2)
    total_norm = np.sqrt(total_norm)

    rate = max_norm / (total_norm + 1e-6)
    if rate < 1:
        for grad in grads:
            grad *= rate