In [1]:
from abc import ABC, abstractmethod
import numpy as np
import time
import re
from collections import OrderedDict


# 激活函数
class ActivationBase(ABC):
    def __init__(self, **kwargs):
        super().__init__()

    def __call__(self, z):
        if z.dim == 1:
            z = z.reshape(1, -1)
        return self.forward(z)

    @abstractmethod
    def forward(self, z):
        """前向传播，通过激活函数得到 a"""
        raise NotImplementedError

    @abstractmethod
    def grad(self, x, **kwargs):
        """反向传播，获得梯度"""
        raise NotImplementedError


class ReLU(ActivationBase):
    """整流线性单元"""
    def __init__(self):
        super().__init__()

    def __str__(self):
        return 'ReLU'

    def forward(self, z):
        return np.clip(z, 0, np.inf)

    def grad(self, x, **kwargs):
        return (x > 0).astype(int)


class Sigmoid(ActivationBase):
    """sigmoid 激活函数"""
    def __init__(self):
        super().__init__()

    def __str__(self):
        return 'Sigmoid'

    def forward(self, z):
        return 1 / (1 + np.exp(-z))

    def grad(self, x, **kwargs):
        return self.forward(x) * (1 - self.forward(x))


class Tanh(ActivationBase):
    """双曲正弦函数"""
    def __init__(self):
        super().__init__()

    def __str__(self):
        return 'Tanh'

    def forward(self, z):
        return np.tanh(z)

    def grad(self, x, **kwargs):
        return 1 - np.tanh(x) ** 2


class Affine(ActivationBase):
    """affine 激活函数，即仿射变换。输出 slope*z + intercept。当 slope=1 且 intercept=0 表示不做变换"""
    def __init__(self, slope=1, intercept=0):
        self.slope = slope
        self.intercept = intercept
        super().__init__()

    def __str__(self):
        return f'Affine(slope={self.slope}, intercept={self.intercept})'

    def forward(self, z):
        return self.slope * z + self.intercept

    def grad(self, x, **kwargs):
        return  self.slope * np.ones_like(x)


class ActivationInitializer:
    def __init__(self, acti_name='sigmoid'):
        self.acti_name = acti_name

    def __call__(self, *args, **kwargs):
        if self.acti_name.lower() == 'sigmoid':
            acti_fn = Sigmoid()
        elif self.acti_name.lower() == 'relu':
            acti_fn = ReLU()
        elif 'affine' in self.acti_name.lower():
            r = r'affine\(slope(.*), intercept=(.*)\)'
            slope, intercept = re.match(r, self.acti_name.lower()).groups()
            acti_fn = Affine(float(slope), float(intercept))
        return acti_fn

In [2]:
# 输出单元
def sigmoid(x):
    return  1 / (1 + np.exp(-x))

def softmax(x):
    e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return e_x / e_x.sum(axis=-1, keepdims=True)

In [None]:
# 定义权重初始化方法
class std_normal:
    """标准正态初始化"""
    def __init__(self, gain=0.01):
        self.gain = gain

    def __call__(self, weight_shape):
        return self.gain * np.random.randn(*weight_shape)


class he_uniform:
    """He 初始化，通过 Uniform(-b, b) 初始化权重矩阵 W，这里的 b=sqrt(6 / n_in)"""
    def __init__(self):
        pass

    def __call__(self, weight_shape):
        n_in, n_out = weight_shape
        b = np.sqrt(6 / n_in)
        return np.random.uniform(-b, b, size=weight_shape)


class WeightInitializer:
    def __init__(self, mode='he_uniform'):
        self.mode = mode
        r = r'([a-zA-z]*)=([^,]*)'
        mode_str = self.mode.lower()
        kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, mode_str)])

        if 'std_normal' in mode_str:
            self.init_fn = std_normal(**kwargs)
        elif 'he_uniform' in mode_str:
            self.init_fn = he_uniform(**kwargs)

    def __call__(self, weight_shape):
        W = self.init_fn(weight_shape)
        return W

In [None]:
# 定义优化 - sgd
class OptimizerBase(ABC):
    def __init__(self):
        pass

    def __call__(self, params, params_grad, params_name):
        """
        参数说明
        :param params: 待更新参数，如权重矩阵 w；
        :param params_grad: 待更新参数的梯度；
        :param params_name: 待更新参数的名称
        :return:
        """
        return self.update(params, params_grad, params_name)

    @abstractmethod
    def update(self, params, params_grad, params_name):
        raise NotImplementedError


class SGD(OptimizerBase):
    """sgd 优化方法"""
    def __init__(self, lr=0.01):
        super().__init__()
        self.lr= lr

    def __str__(self):
        return f'SGD(lr={self.hyperparams["lr"]})'

    def update(self, params, params_grad, params_name):
        update_value = self.lr * params_grad
        return params - update_value

    @property
    def hyperparams(self):
        return {
            'op': 'SGD',
            'lr': self.lr
        }


class OptimizerInitializer(ABC):
    def __init__(self, opti_name="sgd"):
        self.opti_name = opti_name

    def __call__(self):
        r = r"([a-zA-Z]*)=([^,)]*)"
        opti_str = self.opti_name.lower()
        kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, opti_str)])

        if "sgd" in opti_str:
            optimizer = SGD(**kwargs)

        return optimizer