In [1]:
import matplotlib.pyplot as plt
import numpy as np

#### X W B 的运算例子

In [2]:
X = np.random.rand(2)  #输入
W = np.random.rand(2,3)  #权重
B = np.random.rand(3)  #偏置

In [3]:
X.shape,W.shape,B.shape

((2,), (2, 3), (3,))

In [4]:
Y = np.dot(X,W) + B  # 这种前向传播的过程称作仿射变换

In [5]:
Y

array([1.06062506, 1.31155285, 0.70613599])

#### 批版本的Affine

In [6]:
X_dot_W = np.array(
    [
        [0,0,0],
        [10,10,10]
    ]
)
B = np.array([1,2,3])

In [7]:
#使用广播运算
X_dot_W + B

array([[ 1,  2,  3],
       [11, 12, 13]])

##### 上述乘积的反向传播

In [8]:
dY = np.array(
    [
        [1,2,3],
        [11,12,13]
    ]
)
dY

array([[ 1,  2,  3],
       [11, 12, 13]])

In [9]:
dB = np.sum(dY,axis=0)
dB

array([12, 14, 16])

#### 反向传播的时候各个数据的反向传播的值需要纵向汇总为偏置的元素

In [10]:
dY = np.array([
    [1,2,3],
    [4,5,6]
])
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
dB = np.sum(dY,axis=0)

In [12]:
dB

array([5, 7, 9])

#### Affine层的实现

In [13]:
class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self,x):
        self.x = x
        out = np.dot(x,self.W) + self.b
        
        return out
    
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        
        return dx

In [14]:
# 实现交叉熵误差
def cross_entropy_error(y,t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta)) # delta是一个保护策略防止因为y==0溢出

In [1]:
class SoftMaxWithLoss:
    def __init__(self):
        self.loss = None # 损失
        self.y = None    # softmax输出
        self.t = None    # 监督数据(one-hot vector)
    
    def forward(self,x,t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entrophy_error(self.y,self.t)
        
        return self.loss
    
    def backward(self,dout = 1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t)/batch_size
        
        return dx
    