<a href="https://colab.research.google.com/github/hakmin1015/ArtificialIntelligence/blob/main/lab5_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Vanilla Code

In [1]:
import numpy as np
from numpy.random import randn

N, D_in, H, D_out = 64, 1000, 100, 10
x, y = randn(N, D_in), randn(N, D_out)
w1, w2 = randn(D_in, H), randn(H, D_out)

for t in range(2000):
  h = 1/(1 + np.exp(-x.dot(w1)))
  y_pred = h.dot(w2)
  loss = np.square(y_pred-y).sum()
  print(t, loss)

  grad_y_pred = 2.0 * (y_pred - y)
  grad_w2 = h.T.dot(grad_y_pred)
  grad_h = grad_y_pred.dot(w2.T)
  grad_w1 = x.T.dot(grad_h * h * (1-h))

  lr = 1e-4
  w1 -= lr * grad_w1
  w2 -= lr * grad_w2

0 35488.21178358674
1 21732.14538430787
2 16334.344079349861
3 13765.666967556532
4 12355.260794591906
5 11455.80234432788
6 10836.273808662032
7 10367.82215105464
8 9998.064907322198
9 9671.628837045788
10 9352.935983502723
11 9052.918413090632
12 8802.826815072884
13 8590.0035193496
14 8391.765714738101
15 8197.556776298137
16 8007.338580665599
17 7827.071342629375
18 7654.68880813109
19 7484.020464014636
20 7316.687200183665
21 7159.709208868066
22 7012.614906319963
23 6877.751287188393
24 6752.024467662666
25 6630.887446996972
26 6512.591457971162
27 6397.992821633415
28 6287.633555669501
29 6180.2723227612605
30 6075.2684320062945
31 5972.779700655547
32 5871.736808826623
33 5771.922916062651
34 5675.204771217473
35 5582.629583916224
36 5493.915698347954
37 5407.837556683673
38 5322.908740853315
39 5237.3958146886935
40 5150.962882181497
41 5068.9741782655265
42 4993.184508297129
43 4920.5665501251715
44 4849.753790699944
45 4780.193214167125
46 4711.626292495023
47 4643.956352124

# Cifar-10 Classifier로 변경
1. Dataset 읽고 저장하기

In [2]:
import numpy as np
from numpy.random import randn

def unpickle(file):
  import pickle
  with open(file, 'rb') as fo:
    dict = pickle.load(fo, encoding='bytes')
  return dict

from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/MyDrive/AI/lab5/

bat1 = unpickle('data_batch_1')
bat2 = unpickle('data_batch_2')
bat3 = unpickle('data_batch_3')
bat4 = unpickle('data_batch_4')
bat5 = unpickle('data_batch_5')
bat_test = unpickle('test_batch')

data = np.concatenate([bat1[b'data'], bat2[b'data'],
bat3[b'data'],bat4[b'data'], bat5[b'data']])
labels = np.concatenate([bat1[b'labels'],
bat2[b'labels'], bat3[b'labels'],bat4[b'labels'],
bat5[b'labels']])

Mounted at /content/drive/
/content/drive/MyDrive/AI/lab5


2. Training 준비하기
- 10,000개의 이미지만 사용
- Neuron 수 500개로 증가
- t_label을 one hot vector로 변경

In [3]:
t_data = bat1[b'data'].astype("float")

# data의 mean을 zero로 만듬.
# t_data -= np.mean(t_data)
# t_data /= 255

labels = bat1[b'labels']

N, D_in = np.shape(t_data)
H, D_out = 500, 10
t_label = np.zeros((N,D_out))

for i in range(N):
  t_label[i,labels[i]] = 1

w1, w2 = randn(D_in, H)*0.01, randn(H, D_out)*0.01      # weight initialization

3. 안전한 sigmoid 함수
- 일반 sigmoid와 비교
- exp 함수의 크기 증가에 대비

In [4]:
def sigmoid(x : np.ndarray) -> np.ndarray:    # 안전한 version
  positives = x >= 0
  negatives = ~positives
  exp_x_neg = np.exp(x[negatives])
  y = x.copy()
  y[positives] = 1 / (1 + np.exp(-x[positives]))
  y[negatives] = exp_x_neg / (1 + exp_x_neg)
  return y

sigmoid_fn = lambda x: 1/(1+np.exp(-x))       # 일반 version
relu_fn = lambda x: x*(x>0)

4. W optimization에 따른 loss 관찰
- lr 변경해보기
- data의 mean을 zero로 해보기
- weight initialization 변경 해보기

In [5]:
# L2 Loss

for t in range(20):
  z = np.dot(t_data,w1)
  z -= np.max(z)
  # h = sigmoid_fn(z)     #일반 sigmoid 함수
  h = sigmoid(z)        # 안전한 sigmoid 함수
  y_pred = h.dot(w2)
  loss = np.square(y_pred - t_label).sum()
  print(t, loss)

  grad_y_pred = 2.0 * (y_pred - t_label)
  grad_w2 = h.T.dot(grad_y_pred)
  grad_h = grad_y_pred.dot(w2.T)
  grad_w1 = t_data.T.dot(grad_h * h * (1-h))

  lr = 1e-4
  w1 -= lr * grad_w1
  w2 -= lr * grad_w2

0 9999.990878680273
1 9999.990985269013
2 9999.99428595346
3 9999.996507370523
4 9999.997977643912
5 9999.99799422591
6 9999.998200951188
7 9999.99830849064
8 9999.998435235579
9 9999.99853422709
10 9999.998630795206
11 9999.998714844793
12 9999.998793406558
13 9999.998864347117
14 9999.998929961834
15 9999.99899005147
16 9999.999045506564
17 9999.999096549387
18 9999.999143613959
19 9999.999186953792


In [6]:
test_data = bat_test[b'data']
test_label = bat_test[b'labels']

z = np.dot(test_data, w1)
z -= np.max(z)
h = sigmoid(z)
ti = h.dot(w2)

Y_predict = np.argmax(ti, axis=1)
print(Y_predict)
acc = np.mean(Y_predict == test_label) *100
print("The accuracy is %f " % acc)

[8 8 8 ... 8 8 8]
The accuracy is 10.050000 


5. Softmax loss로 변경
- Gradient of softmax function

In [7]:
# Softmax loss

w1, w2 = randn(D_in, H)*0.01, randn(H, D_out)*0.01      # weight initialization

for t in range(20):
  z = np.dot(t_data,w1)
  z -= np.max(z)
  # h = sigmoid_fn(z)     #일반 sigmoid 함수
  h = sigmoid(z)        # 안전한 sigmoid 함수
  z = h.dot(w2)
  z_max = np.max(z, axis=1)
  z -= z_max[:,None]
  exp_s = np.exp(z)
  exp_y_s = exp_s[range(len(labels)),labels]
  ti = exp_s / np.sum(exp_s, axis = 1)[:,None]
  loss_i = -np.log(exp_y_s/np.sum(exp_s, axis = 1))
  loss = np.sum(loss_i)
  print(t, loss)

  grad_s = ti-t_label
  grad_w2 = h.T.dot(grad_s)
  grad_h = grad_s.dot(w2.T)
  grad_w1 = t_data.T.dot(grad_h * h * (1-h))

  lr = 1e-4
  w1 -= lr * grad_w1
  w2 -= lr * grad_w2

0 23025.848851436025
1 23025.848496831477
2 23025.8484333909
3 23025.84826381022
4 23025.84753529905
5 23025.848207179253
6 23025.848806673363
7 23025.848805450092
8 23025.8487843181
9 23025.84876222532
10 23025.84874006717
11 23025.848717886325
12 23025.84869568572
13 23025.848673466455
14 23025.84865122953
15 23025.848628975895
16 23025.848606706477
17 23025.84858442217
18 23025.84856212382
19 23025.848539812258


In [8]:
test_data = bat_test[b'data'].astype("float")
test_label = bat_test[b'labels']

z = np.dot(test_data,w1)
z -= np.max(z)
h = sigmoid(z)
z = h.dot(w2)
z_max = np.max(z, axis=1)
z -= z_max[:,None]
exp_s = np.exp(z)
ti = exp_s / np.sum(exp_s, axis = 1)[:,None]
Y_predict = np.argmax(ti, axis=1)
acc = np.mean(Y_predict == test_label) *100
print("The accuracy is %f " % acc)

The accuracy is 10.000000 
