In [44]:
import numpy as np
from collections import namedtuple

In [45]:
AFUNCTION = namedtuple('actFn', ['forward', 'withGrad'])

sigmoid = lambda x: 1/(1+np.exp(-x))
def sigmoid_withgrad(x):
    y = sigmoid(x)
    drv = y*(1-y)
    return y, np.diag(drv.flatten())

relu = lambda x: np.maximum(0, x)
def relu_withgrad(x):
    y = relu(x)
    drv = np.where(y <= 0, 0, 1)
    return y, np.diag(drv.flatten())

identity = lambda x: x
def identity_withgrad(x):
    drv = np.zeros_like(x)+1
    return x, np.diag(drv.flatten())

af_sigmoid = AFUNCTION(sigmoid, sigmoid_withgrad)
af_relu = AFUNCTION(relu, relu_withgrad)
af_identity = AFUNCTION(identity, identity_withgrad)


In [46]:
from math import sqrt

class MLP:
    def __init__(self, input_dim) -> None:
        self.in_dim = input_dim
        self.last_dim = input_dim
        self.layers = []

    class MLPLayer:
        def __init__(self, mat, bias, aFunc):
            self.mat = mat
            self.bias = bias
            self.aFunc = aFunc

    def _createLayer(self, dim_in, dim_out, aFunc, init_method='Xavier'):
        if 'Xavier' == init_method:
            vw = 2/(dim_in+dim_out)
        elif 'He' == init_method:
            vw = 4/(dim_in+dim_out)
        else:
            raise KeyError('가중치 초기화')

        vw = sqrt(vw) # var -> std
        w = np.random.normal(0, vw, (dim_out, dim_in,))

        if aFunc == 'sigmoid':
            afn = af_sigmoid
        elif aFunc == 'relu':
            afn = af_relu
        elif aFunc == 'identity':
            afn = af_identity
        else:
            raise KeyError('활성화 함수')

        return self.MLPLayer(w, np.zeros((dim_out, 1)), afn)
    
    def add(self, dim, act):
        l = self._createLayer(self.last_dim, dim, act)
        self.layers.append(l)
        self.last_dim = dim
    
    def forward(self, X):
        for layer in self.layers:
            _X = np.matmul(layer.mat, X)+layer.bias
            X = layer.aFunc.forward(_X)
        return X

In [47]:
class Optimizer:
    def __init__(self, model:MLP, loss):
        self.model = model
        self.loss = loss
        self.ctr = 0.01

    def optimize(self, x, y):
        grds = []
        for layer in self.model.layers:
            _X = np.matmul(layer.mat, x)+layer.bias
            X, drv = layer.aFunc.withGrad(_X)
            grds.append((x, drv))
            x = X
        grad = self.loss.grad(x, y)
        model = self.model
        for i in reversed(range(len(grds))):
            grad = np.matmul(grad, grds[i][1])
            g_mat = grad.T * grds[i][0].T
            g_bias = grad.T
            grad = np.matmul(grad, model.layers[i].mat)

            model.layers[i].mat -= g_mat*self.ctr
            model.layers[i].bias -= g_bias*self.ctr


In [48]:
class SoftmaxCE:
    @staticmethod
    def softmax(x):
        ex = np.exp(x)
        sx = ex/np.sum(ex)
        return sx

    def __call__(self, x, y):
        x = self.softmax(x)
        ce = -np.sum(y*np.log(x))
        return ce

    def grad(self, x, y):
        return (self.softmax(x)-y).reshape((1, -1))

In [49]:
import pandas as pd

data = pd.read_csv('iris.csv')

X = data[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
Y = pd.get_dummies(data[['Species']], columns=['Species'])

In [50]:
train_idx = np.random.choice(X.shape[0], int(X.shape[0]*0.8))
X_train = X.iloc[train_idx].to_numpy()
Y_train = Y.iloc[train_idx].to_numpy()

X_test = X.drop(train_idx).to_numpy()
Y_test = Y.drop(train_idx).to_numpy()


In [51]:
ml = MLP(4)
ml.add(5, 'sigmoid')
ml.add(3, 'sigmoid')

opt = Optimizer(ml, SoftmaxCE())

In [57]:
ctrs = [0.1, 0.05]
for c in ctrs:
    opt.ctr = c
    for epc in range(5000):
        for x,y in zip(X_train, Y_train):
            x = x.reshape((-1,1))
            y = y.reshape((-1,1))
            opt.optimize(x, y)

In [59]:
pred = ml.forward(X_test.T)
np.mean(pred.argmax(axis=0)==Y_test.argmax(axis=1)).round(2)

0.92