<a href="https://colab.research.google.com/github/havihaviplants/Artbox/blob/main/%EC%8B%AC%EC%B8%B5%EC%8B%A0%EA%B2%BD%EB%A7%9D_%EC%8B%A4%EC%8A%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

# 자신의 Google Drive 마운트하는 코드를 추가하자@
mnist = np.load('/content/drive/MyDrive/MY Drive/mnist.npz')
x_train = (mnist['x_train'] - np.mean(mnist['x_train'])) / np.std(mnist['x_train'])
y_train = mnist['y_train']
x_test = (mnist['x_test'] - np.mean(mnist['x_train'])) / np.std(mnist['x_train'])
y_test = mnist['y_test']
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [None]:
def to_onehot(labels, num_classes):
 return np.eye(num_classes)[labels]
# one-hot vector의 형태로 변환함
y_train_onehot = to_onehot(y_train, 10)
y_test_onehot = to_onehot(y_test, 10)
print(y_train_onehot.shape, y_test_onehot.shape)

(60000, 10) (10000, 10)


In [None]:
# (28, 28) 크기의 이미지를 (7, 7) 크기의 이미지로 down-sizing (계산 효율을 위해서)
# (7, 7) 크기를 (49,) 크기로 reshape
x_train_small = x_train[:, ::4, ::4].reshape(-1, 7*7)
x_test_small = x_test[:, ::4, ::4].reshape(-1, 7*7)
print(x_train_small.shape, x_test_small.shape)

(60000, 49) (10000, 49)


In [None]:
class Linear:
 def __init__(self, in_features, out_features):
  self.weight = np.random.rand(in_features, out_features) # weight random 초기화
  self.bias = np.random.rand(out_features) # bias random 초기화

  self.grad_w = np.zeros_like(self.weight) # gradients of weight
  self.grad_b = np.zeros_like(self.bias) # gradients of bias
 def forward(self, x):
  self.input_x = x
  x = np.dot(x, self.weight) + self.bias # wx + b
  return x

 def backward(self, grad_output):
  self.grad_w = np.dot(self.input_x.T, grad_output) # weight의 gradient 계산
  self.grad_b = np.sum(grad_output, axis=0) # bias의 gradient 계산

  grad_input = np.dot(grad_output, self.weight.T) # backprogation을 위한 grad input 계산
  return grad_input
 def zero_grad(self):
  self.grad_w.fill(0.)
  self.grad_b.fill(0.)


In [None]:
class ReLU:
 def forward(self, x):
  self.input = x
  return np.maximum(x, 0)
 def backward(self, grad_output):
  grad_input = grad_output.copy()
  grad_input[self.input < 0] = 0 # 0 보다 크면 gradient는 1, 그렇지 않으면 0
  return grad_input

In [None]:
class Softmax:
 def forward(self, x):
  exps = np.exp(x - np.max(x, axis=1, keepdims=True))
  self.probs = exps / np.sum(exps, axis=1, keepdims=True)
  return self.probs
 def backward(self, grad_output):
  batch_size = grad_output.shape[0]
  grad_input = np.zeros_like(grad_output)

  for i in range(batch_size):
    jacobian_matrix = np.diag(self.probs[i])
    jacobian_matrix -= np.outer(self.probs[i], self.probs[i])
    grad_input[i] = np.dot(grad_output[i], jacobian_matrix)

    return grad_input

In [None]:
class MLP:
  def __init__(self, input_size, hidden_size, output_size):
    self.layer1 = Linear(input_size, hidden_size)
    self.relu = ReLU()
    self.layer2 = Linear(hidden_size, output_size)
    self.softmax = Softmax()
  def forward(self, x):
    x = self.layer1.forward(x)
    x = self.relu.forward(x)
    x = self.layer2.forward(x)
    x = self.softmax.forward(x)
    return x
  def backward(self, y_true, y_pred):
    grad_output = (y_pred - y_true) / len(y_true)

    grad_output = self.softmax.backward(grad_output)
    grad_output = self.layer2.backward(grad_output)
    grad_output = self.relu.backward(grad_output)
    _ = self.layer1.backward(grad_output)
  def zero_grad(self):
    self.layer1.zero_grad()
    self.layer2.zero_grad()


In [None]:
class SGD:
 def __init__(self, params, lr):
  self.params = params
  self.lr = lr
 def step(self):
  for param in self.params:
    param.weight = param.weight - self.lr * param.grad_w
    param.bias = param.bias - self.lr * param.grad_b
# Loss Function
def CrossEntropyLoss(y_true, y_pred):
    y_true = np.argmax(y_true, axis=-1) # one-hot vector -> label
    y_pred = y_pred[np.arange(y_true.shape[0]), y_true] # 레이블에 해당하는 y_pred만을 가져옴

    return -np.mean(np.log(y_pred + 1e-8))

In [None]:
model = MLP(input_size=49, hidden_size=128, output_size=10)
opti = SGD([model.layer1, model.layer2], lr=0.05)
epoch = 5000
losses = []
for i in range(epoch):
  y_pred = model.forward(x_train_small) # hypothesis (wx + b)
  loss = CrossEntropyLoss(y_train_onehot, y_pred)
  losses.append(loss)

  model.zero_grad() # gradients를 0으로 초기화
  model.backward(y_train_onehot, y_pred) # backprogation으로 gradients 계산
  opti.step() # SGD 기반 weight & bias 업데이트

  if i % 50 == 0:
    print(f"[{i:4d}] {loss:.8f}")

losses = np.array(losses)
#파이토치를 쓰면 텐서플로우로 갈 수 있지만 그 역은 어렵다. 무조건 파이토치를 해야 함

[   0] 8.78975401
[  50] 8.78922741
[ 100] 8.78871239
[ 150] 8.78820882
[ 200] 8.78771656
[ 250] 8.78723546
[ 300] 8.78676535
[ 350] 8.78630604
[ 400] 8.78585735
[ 450] 8.78541908
[ 500] 8.78499103
[ 550] 8.78457301
[ 600] 8.78416479
[ 650] 8.78376617
[ 700] 8.78337694
[ 750] 8.78299687
[ 800] 8.78262575
[ 850] 8.78226336
[ 900] 8.78190950
[ 950] 8.78156393
[1000] 8.78122646
[1050] 8.78089687
[1100] 8.78057495
[1150] 8.78026050
[1200] 8.77995331
[1250] 8.77965319
[1300] 8.77935996
[1350] 8.77907341
[1400] 8.77879336
[1450] 8.77851964
[1500] 8.77825206
[1550] 8.77799046
[1600] 8.77773466
[1650] 8.77748450
[1700] 8.77723982
[1750] 8.77700047
[1800] 8.77676630
[1850] 8.77653716
[1900] 8.77631290
[1950] 8.77609340
[2000] 8.77587852
[2050] 8.77566813
[2100] 8.77546210
[2150] 8.77526032
[2200] 8.77506265
[2250] 8.77486900
[2300] 8.77467924
[2350] 8.77449327
[2400] 8.77431099
[2450] 8.77413228
[2500] 8.77395706
[2550] 8.77378523
[2600] 8.77361670
[2650] 8.77345139
[2700] 8.77328919
[2750] 8.7