In [1]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys
import tensorflow as tf
import time
from PIL import Image

from tensorflow import keras
from tensorflow.keras.preprocessing import image
print(tf.__version__)

2.1.0


## 反向传播——通俗易懂
https://www.cnblogs.com/charlotte77/p/5629865.html

### 最简单的例子
小日本买水果

In [2]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
        
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        return out
    
    def backward(self, dout):
        dx = dout * self.y #翻转x和y
        dy = dout * self.x        
        return dx, dy

In [3]:
apple = 100
apple_num = 2
tax = 1.1

#layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

#forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print(price)  #220

220.00000000000003


In [4]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        out = x + y
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [5]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

#layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

#forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

print(price)  #220

715.0000000000001


In [6]:
#backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)

In [7]:
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

In [8]:
print(dapple_num, dapple, dorange_num, dorange, dtax)

110.00000000000001 2.2 165.0 3.3000000000000003 650


## DNN

### 正向传播

In [9]:
class Affine:
    """定义仿射层（矩阵乘积）"""
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T) # T代表转置
        self.dW = np.dot(self.x.T, dout) # x要在前面乘
        self.db = np.sum(dout, axis=0) # 偏置反向传播需要汇总为偏置的元素

        return  dx, self.dW, self.db   #修改了输出

In [10]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx  

In [11]:
class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out

        return out

    def backward(self, dout):
        dx = dout * (1 - self.out) * self.out

        return dx    

In [12]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [13]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a-c) #溢出对策
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    
    return y

In [14]:
def cross_entropy_error(y, t):
    delta = le-7    
    return np.sum(t * np.log(y + delta))

In [15]:
def mean_squared_error(y, t):
    return 0.5 * np.sum((t - y)**2)

In [16]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None #损失
        self.y = None #Softmax的输出
        self.x = None #监督数据(one-hot vector)

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y ,self.t)

        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx    

In [122]:
class MSEWithLoss:
    def __init__(self):
        self.loss = None #损失
        self.y = None #输出
        self.x = None #监督数据

    def forward(self, y, t):
        self.t = t
        self.y = y
        self.loss = mean_squared_error(self.y ,self.t)
        
        return self.loss

    def backward(self, dout=1):
        dx = -dout*(self.t - self.y)
        
        return dx

In [123]:
i = np.array([0.05, 0.10])
w1 = np.array([[0.15,0.25],[0.20,0.30]],dtype='float32') #注意次序，很坑
w2 = np.array([[0.40,0.50],[0.45,0.55]],dtype='float32') #注意次序，很坑
b1=0.35
b2=0.60
t = [0.01, 0.99]

In [124]:
fc1 = Affine(w1,b1)
sig1 = Sigmoid()
fc2 = Affine(w2,b2)
sig2 = Sigmoid()
loss = MSEWithLoss()

In [125]:
# forward
h1 = fc1.forward(i)
s1 = sig1.forward(h1)
h2 = fc2.forward(s1)
out = sig2.forward(h2)
los = loss.forward(out, t)
los

0.2983711080102912

In [126]:
out

array([0.75136507, 0.77292847])

In [127]:
#另外的例子
def init_network():
    network = {}
    network['W1'] = np.array([[0.15,0.25],[0.20,0.30]])
    network['b1'] = 0.35
    network['W2'] = np.array([[0.40,0.50],[0.45,0.55]])
    network['b2'] = 0.60
    return network

def forward(network, x):
    W1, W2 = network['W1'], network['W2']
    b1, b2 = network['b1'], network['b2']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)

    return z2

network = init_network()
x = np.array([0.05, 0.10])
forward(network, x)

array([0.75136507, 0.77292847])

In [181]:
network['W1'] = np.array([[0.15,0.25],[0.20,0.30]])
network['b1'] = 0.35
sigmoid(np.dot(x, network['W1']) + network['b1'])

array([0.59326999, 0.59688438])

### 反向传播

In [146]:
out = loss.backward(1)
out

array([ 0.74136507, -0.21707153])

In [147]:
s21 = sig2.backward(out[0])
s21

array([0.13849856, 0.13011702])

In [148]:
s22 = sig2.backward(out[1])
s22

array([-0.04055235, -0.03809824])

In [149]:
x21, dw21, db21 = fc2.backward(s21[0])
0.4 - dw21[0]/2  #w5

0.35891647967297596

In [150]:
0.45 - dw21[1]/2  #w6

0.40866618602120863

In [151]:
x22, dw22, db22 = fc2.backward(s22[1])
0.50 - dw22[1]/2  #w7

0.5113701210003415

In [152]:
0.55 - dw22[1]/2  #w8

0.5613701210003416

### 以上是推导第二层参数
### 以下是推导第一层参数

In [239]:
out = loss.backward(1)
s21 = sig2.backward(out[0])
s22 = sig2.backward(out[1])
x21, dw21, db21 = fc2.backward(s21[0])
x22, dw22, db22 = fc2.backward(s22[1])

In [240]:
x21

array([[0.05539943, 0.06232435],
       [0.06924928, 0.07617421]])

In [241]:
x22

array([[-0.01523929, -0.01714421],
       [-0.01904912, -0.02095403]])

In [242]:
x21[0][0]

0.055399425524065564

In [243]:
x22[1][0]

-0.019049118068774972

In [299]:
x2 = x21[0][0] + x22[1][0]
x2

0.036350307455290595

In [300]:
s2 = sig1.backward(x2)
s2

array([0.00877135, 0.00874637])

In [301]:
x1,w1,b1 = fc1.backward(s2[0])
0.15 - w1[0]/2 #w1

0.14978071612637978

In [302]:
0.2 - w1[1]/2 #w2

0.1995614322527596

In [303]:
x3 = x21[0][1] + x22[1][1]  #不同的是第二列
x3

0.04137032080501818

In [304]:
s3 = sig1.backward(x3)
s3

array([0.00998269, 0.00995425])

In [305]:
x3,w3,b3 = fc1.backward(s3[1])
w3

array([0.00049771, 0.00099543])

In [306]:
0.25 - w3[0]/2 #w3

0.2497511436435177

In [307]:
0.30 - w3[1]/2 #w4

0.2995022872870354

例子中是分开相乘计算的

In [173]:
sig1.backward(1)

array([0.24130071, 0.24061342])

In [175]:
x11 = sig1.backward(1)
x11

array([0.24130071, 0.24061342])

In [178]:
w1 = fc1.backward(1)[1]
w1

array([0.05, 0.1 ])

In [179]:
x2*x11*w1  # 分开计算

array([0.00043857, 0.00087464])