# Chapter 5

## 5.4.1 執行乘法層

In [1]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
        # 實例變數 x, y 初始化
        
    # 正向傳播
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        
        return out
    
    # 反向傳播：針對上層傳來的微分（dout）乘上正向傳播的相反值，再傳遞給下層
    def backward(self, dout):
        dx = dout * self.y
        dy = dout *self.x
        
        return dx, dy

In [3]:
# 實作範例
apple = 100
apple_num = 2
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)
print('Total price is ', price)

# backward
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple, dapple_num, dtax)

Total price is  220.00000000000003
2.2 110.00000000000001 200


## 5.4.2 執行加法層

In [6]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        out = x + y
        
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        
        return dx, dy

In [8]:
# 利用加法層及乘法層，執行買蘋果和橘子的運算

apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(price)
print(dapple_num, dapple, dorange, dorange_num, dtax)



715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


## 5.5 執行活化函數層

## 5.5.1 ReLU層

In [9]:
# 定義 ReLU 層函數
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        
        return dx

In [11]:
# ReLU 函數使用範例
import numpy as np

x = np.array([[1.0, -0.5],[-2.0, 3.0]])
print(x)
mask = (x <= 0)
print(mask)
# 利用 mask 元素，將 True 的位置設置為 0

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]


## 5.5.2 Sigmoid 層

In [2]:
# 建立 Sigmoid 層
import numpy as np

class Sigmoid:
    def __init__(self):
        self.out = None
        
    def forward(self,x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        
        return dx

## 5.6 執行 Affline / Softmax 層

In [4]:
import numpy as np

X = np.random.rand(2) # 輸入
W = np.random.rand(2,3) # 權重
B = np.random.rand(3) # 偏權值

print(X.shape)
print(W.shape)
print(B.shape)

Y = np.dot(X,W) + B
print('Y = ', Y)

(2,)
(2, 3)
(3,)
Y =  [0.84658125 0.595124   1.17820531]


## 5.6.2 批次版 Affline 層

In [5]:
import numpy as np

X_dot_W = np.array([[0,0,0],[10,10,10]])
B = np.array([1,2,3])

print(X_dot_W)
print(X_dot_W + B)

[[ 0  0  0]
 [10 10 10]]
[[ 1  2  3]
 [11 12 13]]


In [8]:
import numpy as np

dY = np.array([[1,2,3],[4,5,6]])
print(dY)

dB = np.sum(dY, axis=0)
print(dB)

[[1 2 3]
 [4 5 6]]
[5 7 9]


In [9]:
# 定義 Affline 層

class Affline:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dw = None
        self.db = None
        
    def forward(self,x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis = 0)
        
        return dx