# 通俗理解神经网络BP传播算法

[通俗理解神经网络BP传播算法](https://zhuanlan.zhihu.com/p/24801814)

---

## BP

In [11]:
import numpy as np

def nonlin(x, deriv=False):
  if (deriv == True):
    return x * (1 - x) #如果deriv为true，求导数
  return 1 / (1 + np.exp(-x))

X = np.array([[0.35],[0.9]]) #输入层
y = np.array([[0.5]]) #输出值

np.random.seed(1)

W0 = np.array([[0.1,0.8],[0.4,0.6]])
W1 = np.array([[0.3,0.9]])
print('original ', W0, '\n', W1)

for j in range(100):
  l0 = X #相当于文章中x0
  l1 = nonlin(np.dot(W0,l0)) #相当于文章中y1
  l2 = nonlin(np.dot(W1,l1)) #相当于文章中y2
  l2_error = y - l2
  Error = 1 / 2.0 * (y - l2) ** 2
  
  print('Iter [', j, '] Error:', Error)
  l2_delta = l2_error * nonlin(l2, deriv=True) #this will backpack

  print('l2_delta=', l2_delta)
  l1_error = l2_delta*W1; #反向传播
  l1_delta = l1_error * nonlin(l1, deriv=True)

  W1 += l2_delta * l1.T; #修改权值
  W0 += l0.T.dot(l1_delta)
  print(W0, '\n', W1)

original  [[ 0.1  0.8]
 [ 0.4  0.6]] 
 [[ 0.3  0.9]]
Iter [ 0 ] Error: [[ 0.0181039]]
l2_delta= [[-0.04068113]]
[[ 0.09661944  0.78985831]
 [ 0.39661944  0.58985831]] 
 [[ 0.27232597  0.87299836]]
Iter [ 1 ] Error: [[ 0.01652628]]
l2_delta= [[-0.03944183]]
[[ 0.09363393  0.78028763]
 [ 0.39363393  0.58028763]] 
 [[ 0.2455836   0.84691021]]
Iter [ 2 ] Error: [[ 0.01506159]]
l2_delta= [[-0.03816188]]
[[ 0.09102066  0.7712756 ]
 [ 0.39102066  0.5712756 ]] 
 [[ 0.21978966  0.82175133]]
Iter [ 3 ] Error: [[ 0.0137064]]
l2_delta= [[-0.03685334]]
[[ 0.08875541  0.76280627]
 [ 0.38875541  0.56280627]] 
 [[ 0.19495316  0.79752995]]
Iter [ 4 ] Error: [[ 0.01245646]]
l2_delta= [[-0.03552736]]
[[ 0.08681317  0.75486083]
 [ 0.38681317  0.55486083]] 
 [[ 0.17107608  0.7742475 ]]
Iter [ 5 ] Error: [[ 0.01130686]]
l2_delta= [[-0.03419403]]
[[ 0.08516869  0.74741831]
 [ 0.38516869  0.54741831]] 
 [[ 0.14815418  0.75189938]]
Iter [ 6 ] Error: [[ 0.01025225]]
l2_delta= [[-0.03286237]]
[[ 0.08379687  0.74

---

[A Neural Network in 11 lines of Python (Part 1)](https://iamtrask.github.io/2015/07/12/basic-python-network/)


In [14]:
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T

syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1

for j in range(60000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

---

## Part 1: A Tiny Toy Network


| Inputs | Output
| :---: | :---:
| 0 0 1 | 0
| 1 1 1 | 1
| 1 0 1 | 1
| 0 1 1 | 0


In [22]:
import numpy as np

def nonlin(x, deriv=False):
  if (deriv == True):
    return x * (1 - x) #如果deriv为true，求导数
  return 1 / (1 + np.exp(-x))

#输入层
X = np.array([ [0, 0, 1],
               [0, 1, 1],
               [1, 0, 1],
               [1, 1, 1] ])

#输出值
y = np.array([[0, 0, 1, 1]]).T

np.random.seed(1)

# w0
W0 = 2 * np.random.random((3, 1)) - 1
print('original ', W0, '\n', W1)

for j in range(100):
  l0 = X #相当于文章中x0
  l1 = nonlin(np.dot(l0, W0)) #相当于文章中y1

  l1_error = y - l1
  # print('Iter [', j, '] l1_error:', l1_error)
 
  l1_delta = l1_error * nonlin(l1, deriv=True) #this will backpack
  # print('l1_delta=', l1_delta)

  W0 += l0.T.dot(l1_delta)

  # print(W0, '\n', W1)

print("Output After Training:")
print(l1)

original  [[-0.16595599]
 [ 0.44064899]
 [-0.99977125]] 
 [[-0.30032342  0.31508797]]
Output After Training:
[[ 0.11035029]
 [ 0.09193919]
 [ 0.92564882]
 [ 0.91041229]]


---

## Part 2: A Slightly Harder Problem

### 2.1 横向量

In [26]:
import numpy as np

def nonlin(x,deriv=False):
  if(deriv==True):
    return x*(1-x)
  return 1/(1+np.exp(-x))
    
X = np.array([ [0,0,1],
               [0,1,1],
               [1,0,1],
               [1,1,1] ])

y = np.array([[0],
              [1],
              [1],
              [0]])

np.random.seed(1)

# randomly initialize our weights with mean 0
syn0 = 2 * np.random.random((3, 4)) - 1
syn1 = 2 * np.random.random((4, 1)) - 1

for j in range(60000):
  # Feed forward through layers 0, 1, and 2
  l0 = X
  l1 = nonlin(np.dot(l0,syn0))
  l2 = nonlin(np.dot(l1,syn1))

  # how much did we miss the target value?
  l2_error = y - l2
    
  if (j % 10000) == 0:
    print("Error:" + str(np.mean(np.abs(l2_error))))

  # in what direction is the target value?
  # were we really sure? if so, don't change too much.
  l2_delta = l2_error * nonlin(l2, deriv=True)

  # how much did each l1 value contribute to the l2 error (according to the weights)?
  l1_error = l2_delta.dot(syn1.T)

  # in what direction is the target l1?
  # were we really sure? if so, don't change too much.
  l1_delta = l1_error * nonlin(l1,deriv=True)

  syn1 += l1.T.dot(l2_delta)
  syn0 += l0.T.dot(l1_delta)

print("Output After Training:")
print(l1)

Error:0.496410031903
Error:0.00858452565325
Error:0.00578945986251
Error:0.00462917677677
Error:0.00395876528027
Error:0.00351012256786
Output After Training:
[[  7.26191199e-01   1.16411907e-01   9.26183940e-01   9.97110310e-01]
 [  1.66762801e-01   3.92990161e-04   1.66519465e-02   8.96576847e-01]
 [  9.96229372e-01   8.95211165e-01   2.23120442e-02   8.38385421e-01]
 [  9.52239003e-01   2.48589483e-02   3.07990327e-05   1.15301801e-01]]


### 2.2 纵向量

In [35]:
import numpy as np

def nonlin(x,deriv=False):
  if(deriv==True):
    return x*(1-x)
  return 1/(1+np.exp(-x))

X = np.array([ [0,0,1],
               [0,1,1],
               [1,0,1],
               [1,1,1] ]).T

y = np.array([[0],
              [1],
              [1],
              [0]]).T

np.random.seed(1)

# randomly initialize our weights with mean 0
syn0 = 2 * np.random.random((4, 3)) - 1
syn1 = 2 * np.random.random((1, 4)) - 1

for j in range(60000):
  # Feed forward through layers 0, 1, and 2
  l0 = X
  l1 = nonlin(np.dot(syn0, l0))
  l2 = nonlin(np.dot(syn1, l1))

  # how much did we miss the target value?
  l2_error = y - l2
    
  if (j % 10000) == 0:
    print("Error:" + str(np.mean(np.abs(l2_error))))

  # in what direction is the target value?
  # were we really sure? if so, don't change too much.
  l2_delta = l2_error * nonlin(l2, deriv=True)

  # how much did each l1 value contribute to the l2 error (according to the weights)?
  l1_error = l2_delta.dot(syn1.T)

  # in what direction is the target l1?
  # were we really sure? if so, don't change too much.
  l1_delta = l1_error * nonlin(l1,deriv=True)

  # syn1 += l1.T.dot(l2_delta)
  # syn0 += l0.T.dot(l1_delta)
  syn1 += l2_delta.dot(l1.T)
  syn0 += l1_delta.dot(l0.T)

print("Output After Training:")
print(l1)

Error:0.500620808426
Error:0.499839521357
Error:0.499908550766
Error:0.499934552293
Error:0.499948496145
Error:0.499957282198
Output After Training:
[[  6.48054319e-05   9.34988367e-06   8.02275630e-06   1.15743682e-06]
 [  6.84963854e-05   7.15849782e-06   7.84953640e-06   8.20303686e-07]
 [  6.93700693e-05   7.54067793e-06   6.87512696e-06   7.47299676e-07]
 [  6.87538652e-05   7.17287805e-06   7.66880121e-06   8.00018532e-07]]
