# 미니배치 적용 MLP

# 레이어 1개(128), relu, size = 1개

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


class MLP:
    def __init__(self, n_input, n_hidden1, n_output, size):
        self.n_input = n_input
        self.n_hidden1 = n_hidden1
        self.n_output = n_output
        self.size= size
        
        # 가중치와 편향 초기화
        #self.w1 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_input, self.n_hidden1))
        self.w1 = np.random.randn(self.n_input, self.n_hidden1)/np.sqrt(self.n_input/2)
        self.b1 = np.zeros((1, self.n_hidden1))
        #self.w2 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_hidden1, self.n_hidden2))
        self.w2 = np.random.randn(self.n_hidden1, self.n_output )/np.sqrt(self.n_hidden1/2)
        self.b2 = np.zeros((1, self.n_output))
        

    def ReLU(self, z):
          return np.maximum(0, z)

    def softmax(self, z):
          z_max = np.max(z)
          exp_z = np.exp(z-z_max)
          y = exp_z / np.sum(exp_z, axis=1, keepdims=True)
          #print(y)
          return y

    def feed(self, x, w, b):
          f = np.dot(x, w) + b
          return f
    
    def cross_entropy(self, y, o):
          loss = -np.mean(y * np.log(o)) #cross entropy
          #loss = np.square(np.subtract(o_1, y)).mean()
          return loss

    def feedforward(self, X, y):

          input_x = self.feed(X, self.w1, self.b1)
          input_x = self.ReLU(input_x)

          osum = self.feed(input_x, self.w2, self.b2)
          o_1 = self.softmax(osum)
                  
          return o_1, input_x

    def batch (self, X, o, loss, w):

          update_w = np.dot(o.T,loss) / self.size #이전 레이어에 전달하여 기울기를 계산
          update_b = np.sum(loss, axis=0, keepdims=True) / self.size #(o_1 - y)에서 샘플에 대한 오차를 모두 더한다.
          pass_loss = np.dot(loss, w.T) #출력층에서 역전파되는 오차를 이전 은닉층에 전달하기 위한 값
          pass_loss[o <= 0] = 0                 #이전 은닉층에서의 가중치 업데이트에 사용
          pass_loss = pass_loss * (o > 0) #활성화(relu) 함수 미분과 곱하여 두번째 은닉층에서의 오차값을 구함

          return update_w, update_b, pass_loss

    def backpropagation(self, X, y, epochs, lr):

        batches =X_train.shape[0] // self.size

        for epoch in range(epochs):

            #섞어서 하나 뽑기
            shuffle = np.random.permutation(len(X))
            X = X[shuffle]
            y = y[shuffle]

            for batch in range(batches) :
              start = batch * self.size
              end = (batch+1) * self.size

              X_batch = X[start:end]
              y_batch = y[start:end]

              o_1, input_x= self.feedforward(X_batch,y_batch) #feedfoward한 값을 받아온다.
              loss = self.cross_entropy(y_batch, o_1)
              sgd_loss = (o_1 - y_batch)

              update_w2, update_b2, pass_loss =self.batch(X_batch, input_x, sgd_loss, self.w2)
              update_w1 = np.dot(X_batch.T, pass_loss) / self.size
              update_b1 = np.sum(pass_loss, axis=0, keepdims=True) / self.size

              # 가중치와 편향 업데이트

              self.w2 -= lr * update_w2
              self.b2 -= lr * update_b2
              self.w1 -= lr * update_w1
              self.b1 -= lr * update_b1
            
            
           # 손실 출력
            if epoch % 10== 0:
                print(f'Epoch {epoch}, Loss {loss:.4f}') 

          
    def predict(self, X, y):
          o_1, input_x= self.feedforward(X, y)
          y_pred = np.argmax(o_1, axis=1)
          return y_pred

iris = load_iris()
X = iris.data
y = iris.target

# X 데이터 정규화
minmaxscalar = MinMaxScaler()
X = minmaxscalar.fit_transform(X)
#X = (X - X.mean(axis=0)) / X.std(axis=0)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# 레이블을 one-hot encoding으로 변환
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)

# MLP 모델 생성
mlp = MLP(n_input=4, n_hidden1=128, n_output=3, size=1)

# 모델 학습
mlp.backpropagation(X_train, y_train, epochs=100, lr=0.1)

# 모델 테스트
y_pred = mlp.predict(X_test, y_test)

# 정확도 계산
y_test_true = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred == y_test_true)
print(f'Test Accuracy: {accuracy:.4f}')

Epoch 0, Loss 0.1184
Epoch 10, Loss 0.0063
Epoch 20, Loss 0.0006
Epoch 30, Loss 0.0016
Epoch 40, Loss 0.0056
Epoch 50, Loss 0.0000
Epoch 60, Loss 0.0046
Epoch 70, Loss 0.2224
Epoch 80, Loss 0.0000
Epoch 90, Loss 0.0000
Test Accuracy: 0.9833


# 레이어 2개, relu, size =1

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


class MLP:
    def __init__(self, n_input, n_hidden1, n_hidden2, n_output, size):
        self.n_input = n_input
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.n_output = n_output
        self.size = size
        
        # 가중치와 편향 초기화
        #self.w1 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_input, self.n_hidden1))
        self.w1 = np.random.randn(self.n_input, self.n_hidden1)/np.sqrt(self.n_input/2)
        self.b1 = np.zeros((1, self.n_hidden1))
        #self.w2 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_hidden1, self.n_hidden2))
        self.w2 = np.random.randn(self.n_hidden1, self.n_hidden2 )/np.sqrt(self.n_hidden1/2)
        self.b2 = np.zeros((1, self.n_hidden2))
        #self.w3 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_hidden2, self.n_output))
        self.w3 = np.random.randn(self.n_hidden2, self.n_output )/np.sqrt(self.n_hidden2/2)
        self.b3 = np.zeros((1, self.n_output))

    def ReLU(self, z):
          return np.maximum(0, z)

    def softmax(self, z):
          z_max = np.max(z)
          exp_z = np.exp(z-z_max)
          y = exp_z / np.sum(exp_z, axis=1, keepdims=True)
          #print(y)
          return y

    def feed(self, x, w, b):
          f = np.dot(x, w) + b
          return f
    
    def cross_entropy(self, y, o):
          loss = -np.mean(y * np.log(o)) #cross entropy
          #loss = np.square(np.subtract(o_1, y)).mean()
          return loss

    def feedforward(self, X, y):

          input_x = self.feed(X, self.w1, self.b1)
          input_x = self.ReLU(input_x)

          zsum = self.feed(input_x, self.w2, self.b2)
          z_1 = self.ReLU(zsum)

          osum = self.feed(z_1, self.w3, self.b3)
          o_1 = self.softmax(osum)
                  
          return o_1, input_x, z_1

    def batch (self, X, o, loss, w):

          update_w = np.dot(o.T,loss) / self.size #이전 레이어에 전달하여 기울기를 계산
          update_b = np.sum(loss, axis=0, keepdims=True) / self.size #(o_1 - y)에서 샘플에 대한 오차를 모두 더한다.
          pass_loss = np.dot(loss, w.T) #출력층에서 역전파되는 오차를 이전 은닉층에 전달하기 위한 값
          pass_loss[o <= 0] = 0                 #이전 은닉층에서의 가중치 업데이트에 사용
          pass_loss = pass_loss * (o > 0) #활성화(relu) 함수 미분과 곱하여 두번째 은닉층에서의 오차값을 구함

          return update_w, update_b, pass_loss


    def backpropagtion(self, X, y, epochs, lr):

        batches = X.shape[0]// self.size

        for epoch in range(epochs):

            #섞어서 하나 뽑기
            shuffle = np.random.permutation(len(X))
            X = X[shuffle]
            y = y[shuffle]

            for batch in range(batches) :
              start_idx = batch * self.size
              end_idx = (batch+1) * self.size

              X_batch = X[start_idx:end_idx]
              y_batch = y[start_idx:end_idx]

              o_1, input_x, z_1 = self.feedforward(X_batch,y_batch) #feedfoward한 값을 받아온다.
              loss = self.cross_entropy(y_batch, o_1)
              sgd_loss = (o_1 - y_batch)

              update_w3, update_b3, pass_loss2 =self.batch( X_batch, z_1, sgd_loss, self.w3)
              update_w2, update_b2, pass_loss =self.batch(X_batch, input_x, pass_loss2, self.w2)
              update_w1 = np.dot(X_batch.T, pass_loss) / self.size
              update_b1 = np.sum(pass_loss, axis=0, keepdims=True) / self.size


              # 가중치와 편향 업데이트
              self.w3 -= lr * update_w3
              self.b3 -= lr * update_b3
              self.w2 -= lr * update_w2
              self.b2 -= lr * update_b2
              self.w1 -= lr * update_w1
              self.b1 -= lr * update_b1
            
            

            # 손실 출력
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss {loss:.4f}') 
            # if loss < 0.000001:
            #     break     
          
    def predict(self, X, y):
          o_1, input_x, z_1 = self.feedforward(X, y)
          y_pred = np.argmax(o_1, axis=1)
          return y_pred

iris = load_iris()
X = iris.data
y = iris.target


# X 데이터 정규화
minmaxscalar = MinMaxScaler()
X = minmaxscalar.fit_transform(X)
#X = (X - X.mean(axis=0)) / X.std(axis=0)

#(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)


# 레이블을 one-hot encoding으로 변환
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)


# MLP 모델 생성
mlp = MLP(n_input=X_train.shape[1], n_hidden1=256, n_hidden2= 128, n_output=3, size = 1)

# 모델 학습
mlp.backpropagtion(X_train, y_train, epochs=1000, lr=0.01)

# 모델 테스트
y_pred = mlp.predict(X_test, y_test)

# 정확도 계산
y_test_true = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred == y_test_true)
print(f'Test Accuracy: {accuracy:.4f}')

Epoch 0, Loss 0.0324
Epoch 100, Loss 0.0000
Epoch 200, Loss 0.0002
Epoch 300, Loss 0.0113
Epoch 400, Loss 0.0475
Epoch 500, Loss 0.0000
Epoch 600, Loss 0.0000
Epoch 700, Loss 0.0001
Epoch 800, Loss 0.0000
Epoch 900, Loss 0.0000
Test Accuracy: 0.9833


# 레이어 2개, relu, size=5

In [None]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


class MLP:
    def __init__(self, n_input, n_hidden1, n_hidden2, n_output, size):
        self.n_input = n_input
        self.n_hidden1 = n_hidden1
        self.n_hidden2 = n_hidden2
        self.n_output = n_output
        self.size= size
        
        # 가중치와 편향 초기화
        #self.w1 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_input, self.n_hidden1))
        self.w1 = np.random.randn(self.n_input, self.n_hidden1)/np.sqrt(self.n_input/2)
        self.b1 = np.zeros((1, self.n_hidden1))
        #self.w2 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_hidden1, self.n_hidden2))
        self.w2 = np.random.randn(self.n_hidden1, self.n_hidden2 )/np.sqrt(self.n_hidden1/2)
        self.b2 = np.zeros((1, self.n_hidden2))
        #self.w3 = np.random.uniform(low =-1.0, high = 1.0, size=(self.n_hidden2, self.n_output))
        self.w3 = np.random.randn(self.n_hidden2, self.n_output )/np.sqrt(self.n_hidden2/2)
        self.b3 = np.zeros((1, self.n_output))

    def ReLU(self, z):
          return np.maximum(0, z)

    def softmax(self, z):
          z_max = np.max(z)
          exp_z = np.exp(z-z_max)
          y = exp_z / np.sum(exp_z, axis=1, keepdims=True)
          #print(y)
          return y

    def feed(self, x, w, b):
          f = np.dot(x, w) + b
          return f
    
    def cross_entropy(self, y, o):
          loss = -np.mean(y * np.log(o)) #cross entropy
          #loss = np.square(np.subtract(o_1, y)).mean()
          return loss

    def feedforward(self, X, y):

          input_x = self.feed(X, self.w1, self.b1)
          input_x = self.ReLU(input_x)

          zsum = self.feed(input_x, self.w2, self.b2)
          z_1 = self.ReLU(zsum)

          osum = self.feed(z_1, self.w3, self.b3)
          o_1 = self.softmax(osum)
                  
          return o_1, input_x, z_1

    def batch (self, X, o, loss, w):

          update_w = np.dot(o.T,loss) / self.size #이전 레이어에 전달하여 기울기를 계산
          update_b = np.sum(loss, axis=0, keepdims=True) / self.size #(o_1 - y)에서 샘플에 대한 오차를 모두 더한다.
          pass_loss = np.dot(loss, w.T) #출력층에서 역전파되는 오차를 이전 은닉층에 전달하기 위한 값
          pass_loss[o <= 0] = 0                 #이전 은닉층에서의 가중치 업데이트에 사용
          pass_loss = pass_loss * (o > 0) #활성화(relu) 함수 미분과 곱하여 두번째 은닉층에서의 오차값을 구함

          return update_w, update_b, pass_loss

    def backpropagation(self, X, y, epochs, lr):

        batches =X_train.shape[0] // self.size

        for epoch in range(epochs):

            #섞어서 하나 뽑기
            shuffle = np.random.permutation(len(X))
            X = X[shuffle]
            y = y[shuffle]

            for batch in range(batches) :
              start = batch * self.size
              end = (batch+1) * self.size

              X_batch = X[start:end]
              y_batch = y[start:end]

              o_1, input_x, z_1 = self.feedforward(X_batch,y_batch) #feedfoward한 값을 받아온다.
              loss = self.cross_entropy(y_batch, o_1)
              sgd_loss = (o_1 - y_batch)

              update_w3, update_b3, pass_loss2 =self.batch( X_batch, z_1, sgd_loss, self.w3)
              update_w2, update_b2, pass_loss =self.batch(X_batch, input_x, pass_loss2, self.w2)
              update_w1 = np.dot(X_batch.T, pass_loss) / self.size
              update_b1 = np.sum(pass_loss, axis=0, keepdims=True) / self.size

              # 가중치와 편향 업데이트
              self.w3 -= lr * update_w3
              self.b3 -= lr * update_b3
              self.w2 -= lr * update_w2
              self.b2 -= lr * update_b2
              self.w1 -= lr * update_w1
              self.b1 -= lr * update_b1
            
            
           # 손실 출력
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss {loss:.4f}') 
            if loss < 0.000001:
                break     
          
    def predict(self, X, y):
          o_1, input_x, z_1= self.feedforward(X, y)
          y_pred = np.argmax(o_1, axis=1)
          return y_pred

iris = load_iris()
X = iris.data
y = iris.target

# X 데이터 정규화
minmaxscalar = MinMaxScaler()
X = minmaxscalar.fit_transform(X)
#X = (X - X.mean(axis=0)) / X.std(axis=0)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# 레이블을 one-hot encoding으로 변환
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)

# MLP 모델 생성
mlp = MLP(n_input=4, n_hidden1=256, n_hidden2=128, n_output=3, size=5)

# 모델 학습
mlp.backpropagation(X_train, y_train, epochs=1000, lr=0.01)

# 모델 테스트
y_pred = mlp.predict(X_test, y_test)

# 정확도 계산
y_test_true = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred == y_test_true)
print(f'Test Accuracy: {accuracy:.4f}')

Epoch 0, Loss 0.2455
Epoch 100, Loss 0.0574
Epoch 200, Loss 0.0847
Epoch 300, Loss 0.0392
Epoch 400, Loss 0.0420
Epoch 500, Loss 0.0011
Epoch 600, Loss 0.0002
Epoch 700, Loss 0.0007
Epoch 800, Loss 0.0289
Epoch 900, Loss 0.0000
Test Accuracy: 0.9833
