# Single Layer Neural Network

이번 시간에는 딥러닝 알고리즘의 가장 기본이 되는 인공신경망(artificial neural network, ANN), 그 중에서도 single-layer neural network 모델을 구현합니다. 오늘은 크게 크게 세 가지 방식, 1) Random Search, 2) h-step Search, 3) Gradient Descent 로 모델을 학습하는 법을 배우며, 이 중에 어떤 것이 가장 좋고 어떤 것을 선택해야하는지를 배웁니다.


In [1]:
import numpy as np

## Case 1 - 0.3 x X1 + 0.5 x X2

### Load Dataset

In [2]:
x1 = np.random.rand(100)

print(x1.shape)
x1[:10]

(100,)


array([0.55273264, 0.89421105, 0.73699131, 0.23789136, 0.08832845,
       0.19622518, 0.3062424 , 0.26894921, 0.23964539, 0.80209015])

In [3]:
x2 = np.random.rand(100)

print(x2.shape)
x2[:10]

(100,)


array([0.27074956, 0.71918693, 0.6369782 , 0.50843926, 0.68968431,
       0.73483579, 0.26951577, 0.86781141, 0.23921792, 0.46951943])

In [4]:
y = 0.3 * x1 + 0.5 * x2 + 0.1

print(y.shape)
y[:10]

(100,)


array([0.40119457, 0.72785678, 0.6395865 , 0.42558704, 0.47134069,
       0.52628545, 0.32663061, 0.61459047, 0.29150258, 0.57538676])

### First idea: Random Search

In [5]:
num_epoch = 10000

best_error = np.inf
best_epoch = None
best_w1 = None
best_w2 = None
best_b = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low=-1.0, high=1.0)
    w2 = np.random.uniform(low=-1.0, high=1.0)
    b = np.random.uniform(low=-1.0, high=1.0)

    y_predict = x1 * w1 + x2 * w2 + b
    
    error = np.abs(y_predict - y).mean()
    
    if error < best_error:
        best_error = error
        best_epoch = epoch
        best_w1 = w1
        best_w2 = w2
        best_b = b

        print("{0:4} w1 = {1:.5f}, w2 = {2:.5f}, b = {3:.5f}, error = {4:.5f}".format(epoch, w1, w2, b, error))

print("----" * 15)
print("{0:4} w1 = {1:.5f}, w2 = {2:.5f}, b = {3:.5f}, error = {4:.5f}".format(best_epoch, best_w1, best_w2, best_b, best_error))

   0 w1 = 0.16720, w2 = 0.10272, b = -0.72859, error = 1.09869
   1 w1 = 0.82804, w2 = 0.53150, b = 0.84809, error = 1.03204
   2 w1 = 0.80962, w2 = 0.07287, b = -0.77298, error = 0.83242
   4 w1 = -0.18429, w2 = 0.12042, b = 0.37839, error = 0.19042
   7 w1 = 0.05444, w2 = 0.00767, b = 0.40372, error = 0.14693
  21 w1 = -0.14689, w2 = 0.21581, b = 0.45108, error = 0.13260
  60 w1 = 0.03061, w2 = 0.57022, b = 0.07129, error = 0.13072
  96 w1 = -0.06154, w2 = 0.79950, b = 0.06346, error = 0.12450
 216 w1 = 0.65567, w2 = 0.26152, b = 0.12255, error = 0.12167
 264 w1 = 0.12405, w2 = 0.74461, b = 0.15717, error = 0.10481
 409 w1 = 0.58461, w2 = 0.44052, b = -0.07694, error = 0.08539
 785 w1 = 0.55593, w2 = 0.31329, b = 0.09222, error = 0.07776
 806 w1 = 0.28493, w2 = 0.56734, b = 0.13940, error = 0.06612
1001 w1 = 0.20557, w2 = 0.39686, b = 0.16333, error = 0.04413
1464 w1 = 0.43189, w2 = 0.49839, b = 0.01952, error = 0.03420
5299 w1 = 0.28513, w2 = 0.58166, b = 0.06983, error = 0.02158
85

### Case 2 - h-step Search

In [6]:
num_epoch = 15000

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

h = 0.01

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b
    current_error = np.abs(y_predict - y).mean()

    if current_error < 0.005:
        break

    y_predict = x1 * (w1 + h) + x2 * w2 + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w1 = w1 + h
    else:
        y_predict = x1 * (w1 - h) + x2 * w2 + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w1 = w1 - h
            
    y_predict = x1 * w1 + x2 * (w2 + h) + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w2 = w2 + h
    else:
        y_predict = x1 * w1 + x2 * (w2 - h) + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w2 = w2 - h

    y_predict = x1 * w1 + x2 * w2 + (b + h)
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        b = b + h
    else:
        y_predict = x1 * w1 + x2 * w2 + (b - h)
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            b = b - h

print("{0} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, current_error))

109 w1 = 0.29811, w2 = 0.51574 b = 0.09606 error = 0.00472


### Third Idea - Gradient Descent

In [7]:
num_epoch = 100
learning_rate = 1.1

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10 == 0:
        print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))
    
print("----" * 15)
print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

 0 w1 = 0.63523, w2 = 0.44607 b = 0.85886 error = 1.25571
10 w1 = 0.29730, w2 = 0.34217 b = 0.21379 error = 0.05609
20 w1 = 0.28380, w2 = 0.42229 b = 0.15153 error = 0.02204
30 w1 = 0.28724, w2 = 0.46289 b = 0.12695 error = 0.01067
40 w1 = 0.29163, w2 = 0.48202 b = 0.11422 error = 0.00536
------------------------------------------------------------
42 w1 = 0.29202, w2 = 0.48326 b = 0.11334 error = 0.00468


In [8]:
num_epoch = 100
learning_rate = 1.1

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10 == 0:
        print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

print("----" * 15)
print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

 0 w1 = -0.28433, w2 = 0.17431 b = 1.00120 error = 0.54307
10 w1 = -0.06276, w2 = 0.24165 b = 0.44684 error = 0.12318
20 w1 = 0.11247, w2 = 0.35496 b = 0.27956 error = 0.06483
30 w1 = 0.20332, w2 = 0.42073 b = 0.19483 error = 0.03415
40 w1 = 0.24989, w2 = 0.45708 b = 0.15014 error = 0.01801
50 w1 = 0.27391, w2 = 0.47691 b = 0.12651 error = 0.00950
60 w1 = 0.28636, w2 = 0.48763 b = 0.11401 error = 0.00502
------------------------------------------------------------
61 w1 = 0.28636, w2 = 0.48763 b = 0.11401 error = 0.00471
