In [1]:
# implemented and written by Yeoreum Lee in AI HnV Lab @ Sahmyook University in 2023
__author__ = 'leeyeoreum02'

In [2]:
from typing import Tuple, Callable

import numpy as np

In [3]:
x_data = np.array([1, 2, 3, 4, 5]).reshape(5, 1)
t_data = np.array([1, 2, 3, 4, 5]).reshape(5, 1)

print(x_data.shape, t_data.shape)

(5, 1) (5, 1)


### 1. 데이터 나누기(split)


In [4]:
def split_data(x_data: np.ndarray, t_data: np.ndarray, split_rate: float) -> Tuple[np.ndarray]:
    test_x_data = x_data[:int(split_rate * len(x_data))]
    test_t_data = t_data[:int(split_rate * len(t_data))]
    train_x_data = x_data[int(split_rate * len(x_data)):]
    train_t_data = t_data[int(split_rate * len(t_data)):]
    
    return train_x_data, train_t_data, test_x_data, test_t_data


train_x_data, train_t_data, test_x_data, test_t_data = split_data(x_data, t_data, split_rate=0.2)
print(train_x_data.shape, train_t_data.shape, test_x_data.shape, test_t_data.shape,)

(4, 1) (4, 1) (1, 1) (1, 1)


### 2. 선형 회귀(linear regression) 모델

$$\boldsymbol{y} = f(W, b)(\boldsymbol{x}) =  W\boldsymbol{x} + b$$

In [5]:
class LinearRegression:
    def __init__(self, n_input: int, n_output: int) -> None:
        self.W = np.random.rand(n_input, n_output)
        self.b = np.random.rand(n_output)
        
    def forward(self, x: np.ndarray) -> np.ndarray:
        y = x @ self.W + self.b
        return y

    def __call__(self, x: np.ndarray) -> np.ndarray:
        return self.forward(x)
    
    
model = LinearRegression(n_input=1, n_output=1)

### 3. 오차 함수 (error function, loss function)
- N은 데이터 개수 (행 개수)
- $y$는 정답(label) $\hat{y}$은 예측값(prediction)

$$MSE = \frac{1} {N}\sum_{i=1} ^N (\boldsymbol{y_{i}} - \boldsymbol{\hat{y_{i}}})^2$$

In [6]:
def mean_square_error(y_data: np.ndarray, t_data: np.ndarray) -> np.ndarray:
    return np.sum((t_data - y_data) ** 2) / len(y_data)

### 4. 수치 미분 (numerical derivative)

$$\frac{df(\boldsymbol{x})} {d\boldsymbol{x}} = \lim_{h \to 0} \frac{f(\boldsymbol{x} + h) - f(\boldsymbol{x} - h)} {2h}$$

In [7]:
def numerical_derivative(f: Callable, x: np.ndarray) -> np.ndarray:
    h = 1e-4
    grad = (f(x + h) - f(x - h)) / (2 * h)
    return grad

### 5. 모델 학습 (train)

메인 교재와 서브 강의만을 이용하여 다음 순서를 가지는 학습 코드를 구현하시오. (구글링 금지)

1. 모델 순전파 (forward)
2. 오차 계산 (loss)
3. 모델 파라미터(가중치 + 편향) 별 오차 함수의 편미분값 계산 (numerical derivative)
4. 가중치(weight), 편향(bias) 갱신 (경사 하강법, gradient descent)

In [8]:
def train() -> None:
    lr = 1e-2
    
    for epoch in range(10000):
        y_data = model(train_x_data)
        loss = mean_square_error(y_data, train_t_data)

        f = lambda W, b: train_x_data @ W + b  # y = f(W, b)(x) = Wx + b
        
        E_w = lambda W: mean_square_error(f(W, model.b), train_t_data)
        E_b = lambda b: mean_square_error(f(model.W, b), train_t_data)
        
        model.W -= lr * numerical_derivative(E_w, model.W)
        model.b -= lr * numerical_derivative(E_b, model.b)
        
        print(f'Epoch: {epoch}, loss {loss}, w: {model.W}, b: {model.b}')
                

train()

Epoch: 0, loss 0.15399006279127692, w: [[0.82123382]], b: [0.4083702]
Epoch: 1, loss 0.08717094508417308, w: [[0.84091477]], b: [0.41133876]
Epoch: 2, loss 0.05279361188263792, w: [[0.85507407]], b: [0.4132568]
Epoch: 3, loss 0.03508738851899511, w: [[0.8652761]], b: [0.41442233]
Epoch: 4, loss 0.0259498661485679, w: [[0.87264199]], b: [0.41504895]
Epoch: 5, loss 0.021217820704086524, w: [[0.87797522]], b: [0.4152897]
Epoch: 6, loss 0.01875172678156182, w: [[0.88185163]], b: [0.4152543]
Epoch: 7, loss 0.017451805597754534, w: [[0.88468389]], b: [0.41502134]
Epoch: 8, loss 0.01675255712051952, w: [[0.88676775]], b: [0.41464717]
Epoch: 9, loss 0.016363074648795148, w: [[0.88831515]], b: [0.41417216]
Epoch: 10, loss 0.016133616051941337, w: [[0.88947801]], b: [0.41362526]
Epoch: 11, loss 0.015987036324957096, w: [[0.89036518]], b: [0.41302719]
Epoch: 12, loss 0.015883544076359293, w: [[0.89105468]], b: [0.41239282]
Epoch: 13, loss 0.015802581064012514, w: [[0.89160242]], b: [0.41173279]
E

### 6. 모델 추론 (evaluate)

메인 교재와 서브 강의만을 이용하여 추론 코드를 구현하시오. (구글링 금지)

In [9]:
def test():
    y_data = model(test_x_data)
    print(y_data, test_t_data)
    
    
test()

[[1.00000001]] [[1]]
