<a href="https://colab.research.google.com/github/danbom/ai/blob/main/mlpxor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# numpy 라이브러리 사용
import numpy as np

# input 2개, output 1개, sample 4개
num_input = 2
num_output = 1
num_sample = 4

# training sample
## 이 Nueral Network의 input X, output Ot
## Example : Learning XOR (7) 참고
X = np.array([0, 0, 1, 1, 0, 1, 0, 1]).reshape(num_input, num_sample)
Ot = np.array([0, 1, 1, 0]).reshape(num_output, num_sample)

print("input>\nX = \n", X)
print("\n")
print("output>\nOt = \n", Ot)

In [None]:
# Layer Weight parameter 초기화 함수 만들기
# Example : Learning XOR (4) 참고

def init_params(num_input = 2, num_hidden = 2) : 

    # 첫번째 Layer
    ## 랜덤하게 초기화
    W1 = np.random.rand(num_hidden, num_input)
    ## bias도 똑같이 랜덤하게 초기화
    B1 = np.random.rand(num_hidden, 1)

    # 두번째 Layer
    W2 = np.random.rand(num_output, num_hidden)
    B2 = np.random.rand(num_output, 1)

    return W1, B1, W2, B2

# 실행
W1, B1, W2, B2 = init_params()
print("Layer 1 >\nW1 =\n",W1, "\nB1 =\n", B1, "\n\nLayer 2 >\nW2 =\n", W2, "\nB2 =\n", B2)

In [None]:
# Weight 행렬과 input을 곱해주는 함수 만들기
## 'weight' x 'input' + 'bias'
def wxpb(W, X, B) :
    return np.dot(W, X) + B

# activation function 
def sigmoid(x) :
    return 1./(1+np.exp(-1*x))

# 실행
## 첫번째 Layer
Z1 = wxpb(W1, X, B1)
## output ( Example(4)에서 Hidden Layer )
Y = sigmoid(Z1)
## 두번째 Layer
Z2 = wxpb(W2, Y, B2)
## 최종 output
O = sigmoid(Z2)

# 출력
print("Y =\n", Y)
print("O =\n", O)

In [None]:
# Loss_function 정의 : logistic regression 식 사용
def loss_func(O, Ot) :
    return 1./num_sample * np.sum(-1*(Ot * np.log(O) + (1-Ot)*np.log(1-O)))

loss = loss_func(O, Ot)
print("Loss =\n", loss)

In [None]:
# forward propagation
def forward(W1, B1, W2, B2) :
    # 첫번째 Layer
    Z1 = wxpb(W1, X, B1)
    Y = sigmoid(Z1)
    # 두번째 Layer
    Z2 = wxpb(W2, Y, B2)
    O = sigmoid(Z2)
    # loss
    loss = loss_func(O, Ot)
    # return
    return Z1, Y, Z2, O, loss

# back propagation : Backpropagation in DNN (2) 참고
def backprop(W1, B1, W2, B2, Z1, Y, Z2, O, Ot) :
    # 두번째 레이어부터(뒤부터)
    dZ2 = np.multiply((O-Ot), 1)
    dW2 = np.dot(dZ2, Y.T)
    dB2 = 1. / 4. * np.sum(dZ2, axis = 1, keepdims=True)
    dY = np.dot(W2.T, dZ2)
    # 첫번째 레이어
    dZ1 = np.multiply(dY, Y*(1-Y))
    dW1 = np.dot(dZ1, X.T)
    dB1 = 1. / 4. * np.sum(dZ1, axis = 1, keepdims=True)
    
    return dW1, dB1, dW2, dB2

In [None]:
# train 함수 정의
## lr = learning rate
## loss_trace : training iteration 을 지나감에 따라 변하는 loss 를 trace 하기위해 필요한 array 변수
## return 에서 loss 는 iteration 이 모두 끝난 후 최종 loss
## return 에서 O 는 최종 output
def train(W1, B1, W2, B2, lr = 0.1, iteration = 1000) :

    loss_trace = []
    
    for iter in range(iteration) :

        # Training / Inference 참고
        ## Forward propagation
        Z1, Y, Z2, O, loss = forward(W1, B1, W2, B2)
        ## Backpropagation
        dW1, dB1, dW2, dB2 = backprop(W1, B1, W2, B2, Z1, Y, Z2, O, Ot)
        ## Update Weights/Biases
        W1 = W1 - lr * dW1
        B1 = B1 - lr * dB1
        W2 = W2 - lr * dW2
        B2 = B2 - lr * dB2

        loss_trace.append(loss)

    return W1, B1, W2, B2, loss_trace, loss, O

In [None]:
# 실제 함수 동작시키기
## initialize
W1, B1, W2, B2 = init_params()
## 동작
W1, B1, W2, B2, loss_trace, loss, O = train(W1, B1, W2, B2, 0.1, 50000)

In [None]:
# loss_trace 그래프
import matplotlib.pyplot as plt

plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("최종 output =\n", O)
print("최종 loss =\n", loss)

In [None]:
for i in range(5) :
    ## initialize
    W1, B1, W2, B2 = init_params()
    ## 동작
    W1, B1, W2, B2, loss_trace, loss, O = train(W1, B1, W2, B2, 0.1, 4000)
    print(i+1, "번째 최종 loss =\n", loss)

In [None]:
import matplotlib.pyplot as plt

def test_train(W1, B1, W2, B2, lr, iteration) :

    loss_trace = []
    flag = 0
    for iter in range(iteration) :

        # Training / Inference 참고
        ## Forward propagation
        Z1, Y, Z2, O, loss = forward(W1, B1, W2, B2)
        ## Backpropagation
        dW1, dB1, dW2, dB2 = backprop(W1, B1, W2, B2, Z1, Y, Z2, O, Ot)
        ## Update Weights/Biases
        W1 = W1 - lr * dW1
        B1 = B1 - lr * dB1
        W2 = W2 - lr * dW2
        B2 = B2 - lr * dB2

        loss_trace.append(loss)

        if loss < 0.02 and flag == 0:
            print("Learning rate가 ", lr, "인 경우 >")
            print("0.02 미만에 도달하는 iteration 횟수 : ", iter)
            print("이 때의 loss 값 : ", loss, "\n")
            flag=1
            break

    return W1, B1, W2, B2, loss_trace, loss, O
    
# learning rate = 1

W1, B1, W2, B2 = init_params()
fixW1, fixB2, fixW2, fixB2 = W1, B1, W2, B2
W1, B1, W2, B2, loss_trace, loss, O = test_train(W1, B1, W2, B2, 1, 100000)
plt.figure(figsize=(12,3))
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("\n")


# learning rate = 0.5

W1, B1, W2, B2 = init_params()
fixW1, fixB2, fixW2, fixB2 = W1, B1, W2, B2
W1, B1, W2, B2, loss_trace, loss, O = test_train(W1, B1, W2, B2, 0.5, 100000)
plt.figure(figsize=(12,3))
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("\n")

# learning rate = 0.1

W1, B1, W2, B2, loss_trace, loss, O = test_train(fixW1, fixB2, fixW2, fixB2, 0.1, 100000)
plt.figure(figsize=(12,3))
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("\n")

# learning rate = 0.01

W1, B1, W2, B2, loss_trace, loss, O = test_train(fixW1, fixB2, fixW2, fixB2, 0.01, 100000)
plt.figure(figsize=(12,3))
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("\n")


# learning rate = 0.0001

W1, B1, W2, B2 = init_params()
fixW1, fixB2, fixW2, fixB2 = W1, B1, W2, B2
W1, B1, W2, B2, loss_trace, loss, O = test_train(W1, B1, W2, B2, 0.0001, 100000)
plt.figure(figsize=(12,3))
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("\n")

In [None]:
# weight, bias 모두 0으로 초기화되는 경우
## initialize
def test_init_params(num_input = 2, num_hidden = 2) : 

    # 첫번째 Layer
    ## 0으로 초기화
    W1 = np.zeros((num_hidden, num_input))
    ## bias도 똑같이 0으로 초기화
    B1 = np.zeros((num_hidden, 1))

    # 두번째 Layer
    W2 = np.zeros((num_output, num_hidden))
    B2 = np.zeros((num_output, 1))

    return W1, B1, W2, B2

## 동작
W1, B1, W2, B2 = test_init_params()
W1, B1, W2, B2, loss_trace, loss, O = train(W1, B1, W2, B2, 0.1, 4000)

# loss_trace 그래프
import matplotlib.pyplot as plt

plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()
print("최종 output =\n", O)
print("최종 loss =\n", loss)

In [None]:
for i in range(5) :
    ## initialize
    W1, B1, W2, B2 = test_init_params()
    ## 동작
    W1, B1, W2, B2, loss_trace, loss, O = train(W1, B1, W2, B2, 0.1, 4000)
    print(i+1, "번째 최종 loss =\n", loss)