In [None]:

"""

Reference:
  doc\lang\programming\algo\双隐层神经网络梯度下降公式推导.md

"""


import numpy as np
import itertools


def sigmoid(x):
    return 1/(1 + np.exp(-x))

# 激活函数的导函数
def derivative_sigmoid(x):
    d = sigmoid(x)
    return d * (1 - d)

if __name__ == "__main__":

    """
    异或问题，双隐层神经网络算法
    """
    m = 4 # 样本数
    n = 2 # 每个样本的维度

    alpha = 0.5 # 学习率
    maxIter = 50000 # 最大迭代次数

    x_0 = np.array([
                [0, 0, 1, 1],
                [0, 1, 0, 1]
            ], np.float)
    # (2 * 4) 输入

    Y = np.array([
                [0, 1, 1, 0]
            ], np.float)
    # (1 * 4) 输出

    W_1 = np.random.uniform(size=(n, n))   # 隐层一权重
    # (2 * 2) (2 * 4) -> (2 * 4)

    b_1 = np.random.uniform(size=(n, m))   # 隐层一偏置


    W_2 = np.random.uniform(size=(1, n))   # 隐层二权重
    # (1 * 2) (2 * 4) -> (1 * 4)

    b_2 = np.random.uniform(size=(1, m))   # 隐层二偏置

    for k in range(50000): # 50000
        """
        前向传播部分开始
        """
        a_1 = np.dot(W_1, x_0) + b_1    
        x_1 = sigmoid(a_1)           # (2 * 4) 
        
        a_2 = np.dot(W_2, x_1) + b_2 # ()
        h_2 = sigmoid(a_2)                  # 预测结果

        E = h_2 - Y                         # 误差值

        errs = sum( list(itertools.chain(*abs(E))) )  # 误差总和
        if errs < 0.05:
            print(f'stop at {k}')
            """
            输出训练好的参数
            """
            print("Weight1: \n",W_1)
            print("b1: \n",b_1)
            print("Weight2: \n",W_2)
            print("b2: \n",b_2)
            break

        print(f'curr errs:', errs) 


        """
        反向传播部分开始
        """

        """
        权重更新
        """
        a_2_derivative = derivative_sigmoid(a_2)  # 求a_2 的导数
        a_1_derivative = derivative_sigmoid(a_1)  # 求a_1 的导数

        # 第一层权重的更新
        for i in range(n):
            for j in range(n):
                sums = 0
                for s in range(m):
                    sums += E[0][s] * a_2_derivative[0][s] * W_2[0][i] * a_1_derivative[i][s] * x_0[j][s]

                W_1[i][j] = W_1[i][j] - alpha * (1/m) * sums  # 更新权重


        # 第一层偏置的更新
        for i in range(n):
            for k in range(m):
                sums = 0
                for s in range(m):
                    if k == s:
                       sums += E[0][s] * a_2_derivative[0][s] * W_2[0][i] * a_1_derivative[i][s]
                
                b_1[i][k] = b_1[i][k] - alpha * (1/m) * sums # 更新偏置


        # 第二层权重的更新
        for j in range(n):
            sums = 0
            for s in range(m):
                sums += E[0][s] * a_2_derivative[0][s] * x_1[j][s]

            W_2[0][j] = W_2[0][j] - alpha * (1/m) * sums  # 更新
            
        
        # 第二层偏置的更新
        for k in range(m):
            sums = 0
            for s in range(m):
                if s == k:
                    sums += E[0][s] * a_2_derivative[0][s]
            
            b_2[0][k] = b_2[0][k] - alpha * (1/m) * sums # 更新