In [59]:
import numpy as np

In [60]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # 가중치 초기화
        # self.w1_2_3_4 = np.random.random((self.input_size, self.hidden_size))
        self.w1_2_3_4 = [[1, 10], [1, 10]]
        # self.w5_6 = np.random.random((self.hidden_size, self.output_size))
        self.w5_6 = [[-40], [40]]

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def forward(self, X):
        # Propogate inputs through the network
        self.z1_2 = np.dot(X, self.w1_2_3_4)    # 1 x 2
        self.h = self.sigmoid(self.z1_2)        # 1 x 2
        self.z3 = np.dot(self.h, self.w5_6)     # 1 x 1
        self.o = self.sigmoid(self.z3)          # 1 x 1
        return self.o
    
    def mse_loss(self, y_true, y_pred):
        # MSE 손실계산
        return np.mean((y_true - y_pred) ** 2)
    
    def backward(self, X, y, y_pred, learning_rate):
        # 체인룰 계산
        dc_do1 = -2 * (y - y_pred)
        do1_dz3 = y_pred * (1 - y_pred)
        dz3_dw5_6 = self.h
        dc_dw5_6 = dc_do1 * do1_dz3 * dz3_dw5_6     # 1 x 2
        self.w5_6 = self.w5_6 + learning_rate * -dc_dw5_6.T # 2 x 1
        dc_dw1_2_3_4 = dc_do1 * do1_dz3 * np.dot(self.w5_6 * (self.h * (1 - self.h)).T, X)

        self.w1_2_3_4 = self.w1_2_3_4 + learning_rate * -dc_dw1_2_3_4.T

    def train(self, X_train, y_train, epochs, learning_rate):
        # for epoch in range(epochs)
        for epoch in range(epochs):
            for i in range(len(X_train)):
                # Forward pass
                y_pred = self.forward([X_train[i]])
                # Compute and print loss
                loss = self.mse_loss([y_train[i]], y_pred)
                # Backward pass
                self.backward([X_train[i]], [y_train[i]], y_pred, learning_rate)
            if np.mod(epoch, 100) == 0:
                print('epoch = ', epoch, 'loss = ', loss)


In [61]:
# 데이터 생성
X_train = np.random.randint(0, 2, (100, 2))
y_train = (X_train[:, 0] != X_train[:, 1]).astype(int)

In [62]:
# 다중 퍼셉트론 선언
mlp = MLP(input_size = 2, hidden_size = 2, output_size = 1)

In [63]:
mlp.train(X_train, y_train, epochs = 1000, learning_rate = 0.1)

epoch =  0 loss =  2.6626258635894363e-06
epoch =  100 loss =  0.0010239162967341034
epoch =  200 loss =  0.0009248629026825089
epoch =  300 loss =  0.0008968392087203384
epoch =  400 loss =  0.0008849877291034041
epoch =  500 loss =  0.0008782700702842171
epoch =  600 loss =  0.0008732125013674105
epoch =  700 loss =  0.0008685148083462654
epoch =  800 loss =  0.000863668777676337
epoch =  900 loss =  0.0008584933106181162


In [64]:
# 테스트 값으로 모델값 예측
test_input = np.array([[0, 0]])
predicted_output = mlp.forward(test_input)
print("Predicted Output: ", test_input, predicted_output)
test_input = np.array([[1, 0]])
predicted_output = mlp.forward(test_input)
print("Predicted Output: ", test_input, predicted_output)
test_input = np.array([[0, 1]])
predicted_output = mlp.forward(test_input)
print("Predicted Output: ", test_input, predicted_output)
test_input = np.array([[1, 1]])
predicted_output = mlp.forward(test_input)
print("Predicted Output: ", test_input, predicted_output)

Predicted Output:  [[0 0]] [[0.01200884]]
Predicted Output:  [[1 0]] [[0.97280066]]
Predicted Output:  [[0 1]] [[0.97117483]]
Predicted Output:  [[1 1]] [[0.04028473]]
