## 计算图的加法和乘法层的实现

#### 1. 简单的标量之间的计算图

In [1]:
# 计算图的乘法层
class MulLayer:
    
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = self.x * self.y
        
        return out
    
    def backward(self, dout):
        #乘法的反向是交换x y
        dx = dout * self.y
        dy = dout * self.x
        
        return dx, dy


In [2]:
# 测试乘法层 用书上苹果的案例 
# y = x1 * x2 * x3
x1 = 100  # 苹果单价
x2 = 2    # 苹果个数
x3 = 1.1  # 0.1的消费税


# 第一层 苹果单价*个数 
# 第二层 第一层的输出*税

layer1 = MulLayer()
layer2 = MulLayer()

# forward
o1 = layer1.forward(x1, x2)
o2 = layer2.forward(o1, x3)
print(f'{o2}')
# o2就是x1 * x2 * x3

# backward
d = 1.0
do1, d3 = layer2.backward(d)
d1, d2 = layer1.backward(do1)
print(d1, d2, d3)

# d1/d2/d3表示x1/x2/x3增大1的话，输出y增大多少


220.00000000000003
2.2 110.00000000000001 200.0


In [3]:
# 计算图的加法层
# 加法层比较简单，由于反向是把d直接传递出去，因此不需要存储x和y
class AddLayer():
    
    def __init__(self):
        pass
    
    def forward(self, x, y):
        return x + y
    
    def backward(self, dout):
        return dout, dout


In [4]:
# 测试加法层, 用书上苹果和橘子的案例
# (苹果个数*苹果单价 + 橘子个数*橘子单价)*消费税=最终价

x1 = 100  # 苹果单价
x2 = 2    # 苹果个数
x3 = 150  # 橘子单价
x4 = 3    # 橘子个数
x5 = 1.1  # 税

layer1 = MulLayer() # 计算苹果总价
layer2 = MulLayer() # 计算橘子总价
layer3 = AddLayer() # 计算苹果+橘子总价
layer4 = MulLayer() # 计算税后价格，即最终价格

# forward 计算总价
o1 = layer1.forward(x1, x2)
o2 = layer2.forward(x3, x4)
o3 = layer3.forward(o1, o2)
o4 = layer4.forward(o3, x5)
print(o4)

# backward 计算x1-x5每提升1，对输出的影响
d = 1
do4, d5 = layer4.backward(d)
do31, do32 = layer3.backward(do4)
d1, d2 = layer1.backward(do31)
d3, d4 = layer2.backward(do32)
print(d1, d2, d3, d4, d5)

# 这里的5个输出应该对照书里的计算图(书138页/pdf163页)上的反向数值一致


715.0000000000001
2.2 110.00000000000001 3.3000000000000003 165.0 650


#### 2. 批量输入的计算图，输入是numpy的矩阵

In [5]:
import numpy as np

In [6]:
# 激活函数的实现
# relu函数 这里的输入是numpy的数组, mask是bool的ndarray
class Relu: 
    
    def __init__(self):
        self.mask = None
   
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

# sigmoid.  y = 1 / (1 - exp(-x))  导数是：y(1 - y)

class Sigmoid():
    
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * self.out * (1 - self.out)
        return dx


In [7]:
# affine 仿射层
# 仿射变换指的是向量X经过矩阵的线性变化和平移之后的变化，Y = X.W + b
# 这里对应网络里的每一层，和OPENGL里，坐标点的平移缩放其实是一样的...
class Affine:
    
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forword(self, x):
        self.x = x
        dout = x.dot(self.W) + self.b
        return dout
    
    def backword(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        return dx
    
# 测试下affine层
X = np.random.rand(10, 3)
W = np.random.rand(3, 4)
# 这里B用rand(1,4)也行，不过backword里需要保持维度：
# np.sum(dout, axis=0, keepdims=True)
# 直接rand(4)当作一维数组处理，利用广播机制也很好, 求和就不用保持维度了
B = np.random.rand(4)

nn = Affine(W, B)
Y = nn.forword(X)
dy = np.random.rand(*Y.shape)
dx = nn.backword(dy)

print(dx.shape)
print(nn.dW.shape)
print(nn.db.shape)


(10, 3)
(3, 4)
(4,)


In [328]:
# softmax 输出层的实现 这里附带了交叉熵误差
class SoftmaxWithLoss:
    
    def __init__(self):
        self.y = None
        self.t = None
    
    # 输出函数 批量操作
    def softmax(self, x):
        if x.ndim == 1:
            x = x.reshape(1, -1)
        c = np.max(x, axis = 1, keepdims = True)
        exp_a = np.exp(x - c) # 减去c是为了防止溢出
        sum_exp_a = np.sum(exp_a, axis = 1, keepdims = True)
        y = exp_a / sum_exp_a
        return y
    
     
    # 损失函数 
    def cross_entropy_error(self, y, t):
        # 由于loge(0)是负无穷大-inf，计算机无法继续之后的运算
        # 所以给输入增加一个微小的数，并且不影响结果
        delta = 1e-7
        # 除以批量
        batch_size = y.shape[0]
        return -np.sum(t * np.log(y + delta)) / batch_size
    
    
    def forward(self, x, t):
        self.y = self.softmax(x)
        self.t = t
        loss = self.cross_entropy_error(self.y, t)
        return loss
    
    def backword(self, dout = 1): 
        dx = (self.y -self.t)
        return dx
    


# 测试下SoftMax
X = np.random.rand(10, 6)

T = np.zeros_like(X)
rand_indices = np.random.choice(T.shape[1], size=T.shape[0])
row_indices = np.arange(T.shape[0])
T[row_indices, rand_indices] = 1.0

nn = SoftmaxWithLoss()
loss = nn.forward(X, T)
print(loss)
nn.backword(loss)

1.7780617432346744


array([[ 0.27040524, -0.79625005,  0.14783498,  0.16457343,  0.10539249,
         0.10804391],
       [ 0.2007824 ,  0.20766571,  0.08333587,  0.09106223, -0.77973081,
         0.1968846 ],
       [ 0.17556456,  0.15732324,  0.23592382,  0.09904414,  0.17601619,
        -0.84387194],
       [ 0.17254261,  0.16242822,  0.18695241,  0.22311431, -0.86915796,
         0.12412039],
       [-0.86654801,  0.22231232,  0.19972956,  0.10024882,  0.1041884 ,
         0.2400689 ],
       [ 0.2113994 ,  0.11970695,  0.21815751,  0.09804639, -0.86930177,
         0.22199152],
       [ 0.1281818 ,  0.29403868,  0.13237207,  0.11283784, -0.83542405,
         0.16799365],
       [-0.75196752,  0.1336352 ,  0.11152758,  0.19930312,  0.19748874,
         0.1100129 ],
       [ 0.22834334, -0.85624629,  0.15955736,  0.14428698,  0.15154098,
         0.17251762],
       [ 0.19681145, -0.79788198,  0.18449839,  0.15977553,  0.12086807,
         0.13592855]])