# BP神经网络预测鸢尾花种类

## 1.数据预处理

In [1]:
#读取鸢尾花数据
import pandas as pd
iris = pd.read_csv(r"database/iris.csv")
iris

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [2]:
# 将输出的类别修改为实数
# Iris-setosa为1
# Iris-versicolor为2
# Iris-virginica为3
iris.loc[(iris['class']=='Iris-setosa'),'class'] = 1
iris.loc[(iris['class']=='Iris-versicolor'),'class'] = 2
iris.loc[(iris['class']=='Iris-virginica'),'class'] = 3
iris

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,3
146,6.3,2.5,5.0,1.9,3
147,6.5,3.0,5.2,2.0,3
148,6.2,3.4,5.4,2.3,3


In [3]:
# 打乱数据顺序
# frac为所需要的比例，1为全需要
iris = iris.sample(frac=1)
iris

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
123,6.3,2.7,4.9,1.8,3
57,4.9,2.4,3.3,1.0,2
138,6.0,3.0,4.8,1.8,3
97,6.2,2.9,4.3,1.3,2
124,6.7,3.3,5.7,2.1,3
...,...,...,...,...,...
35,5.0,3.2,1.2,0.2,1
26,5.0,3.4,1.6,0.4,1
102,7.1,3.0,5.9,2.1,3
133,6.3,2.8,5.1,1.5,3


In [4]:
# 取特征值
iris_data = iris.values
iris_feature = iris_data[0:,0:4]
iris_feature[0:10]

array([[6.3, 2.7, 4.9, 1.8],
       [4.9, 2.4, 3.3, 1.0],
       [6.0, 3.0, 4.8, 1.8],
       [6.2, 2.9, 4.3, 1.3],
       [6.7, 3.3, 5.7, 2.1],
       [5.8, 2.7, 5.1, 1.9],
       [5.6, 3.0, 4.5, 1.5],
       [5.0, 3.0, 1.6, 0.2],
       [5.8, 2.7, 5.1, 1.9],
       [6.9, 3.1, 5.4, 2.1]], dtype=object)

In [5]:
len(iris_feature)

150

# 2.构建神经网络

In [6]:
# 生成大小为I*J的随机数矩阵，用于构建权重
import numpy as np
def makeArray(I,J):
    m = []
    for i in range(I):
        fill = np.random.random()
        m.append([fill]*J)
    return m

In [7]:
# 结点的激活函数sigmoid
import math
def sigmoid(x):
    return 1.0 / (1.0 + math.exp(-x))

In [8]:
# sigmoid的导函数
def dsigmoid(x):
    return x * (1-x)

In [9]:
class NN:
#     三层反向传播神经网络

    def  __init__(self,ni,nh,no):
#         定义神经网络结点个数，输入层和隐藏层增加偏置结点
        self.ni = ni + 1
        self.nh = nh + 1
        self.no = no
        
#         激活神经网络的所有结点（向量）
        self.ai = [1.0] * self.ni
        self.ah = [1.0] * self.nh
        self.ao = [1.0] * self.no
        
#         建立权重
        self.wi = makeArray(self.ni,self.nh)
        self.wo = makeArray(self.nh,self.no)
        
        
#     正向传播
    def update(self,inputs):
            
#         激活输入层
        for i in range(self.ni - 1):
            self.ai[i] = inputs[i]
        
#         激活隐藏层
        for j in range(self.nh):
            sum = 0.0
            for i in range(self.ni):
                sum = sum + self.ai[i] * self.wi[i][j]
            self.ah[j] = sigmoid(sum)
            
#         激活输出层
        for k in range(self.no):
            sum = 0.0
            for j in range(self.nh):
                sum = sum + self.ah[j] * self.wo[j][k]
            self.ao[k] = sigmoid(sum)
            
        return self.ao[:]
    
#     反向传播
    def backPropagate(self,targets,lr):
#         计算输出层的误差
        output_deltas = [0.0] * self.no
        for k in range(self.no):
            error = targets[k] - self.ao[k]
            output_deltas[k] = dsigmoid(self.ao[k]) * error
            
#         计算隐藏层的误差
        hidden_deltas = [0.0] * self.nh
        for j in range(self.nh):
            error = 0.0
            for k in range(self.no):
                error = error + output_deltas[k] * self.wo[j][k]
            hidden_deltas[j] = dsigmoid(self.ah[j]) * error
            
#         更新输出层权重
        for j in range(self.nh):
            for k in range(self.no):
                change = output_deltas[k] * self.ah[j]
                self.wo[j][k] = self.wo[j][k] + lr * change
                
#         更新输入层权重
        for i in range(self.ni):
            for j in range(self.nh):
                change = hidden_deltas[j] * self.ai[i]
                self.wi[i][j] = self.wi[i][j] + lr * change
                
#         计算误差
        error = 0.0
        error += 0.5 * (targets[k] - self.ao[k]) ** 2
        return error
    
    def test(self,patterns):
        count = 0
        for p in patterns:
            target = p[1].index(1) + 1
            result = self.update(p[0])
            index = result.index(max(result)) + 1
            print(p[0],':',target,'->',index)
#             cout += (target == index)
            if(target == index):
                count = count + 1
        accuracy = float(count / len(patterns))
        print ('accuracy: %-.9f' % accuracy)
        
    def weights(self):
        print('输入层权重：')
        for i in range(self.ni):
            print(self.wi[i])
        print()
        print('隐藏层权重：')
        for j in range(self.nh):
            print(self.wo[j])
            
    def train(self,patterns,iterations = 1000,lr = 0.1):
        for i in range(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.backPropagate(targets,lr)
            if i % 100 == 0:
                print('error: %-.9f' % error)

In [10]:
data = []
for i in range(len(iris_feature)):
    ele = []
    ele.append(list(iris_feature[i]))
    if iris_data[i][4] == 1:
        ele.append([1,0,0])
    elif iris_data[i][4] == 2:
        ele.append([0,1,0])
    else:
        ele.append([0,0,1])
    data.append(ele)

In [11]:
training = data[0:105]
test = data[106:]
test

[[[5.8, 2.7, 3.9, 1.2], [0, 1, 0]],
 [[6.0, 2.7, 5.1, 1.6], [0, 1, 0]],
 [[6.9, 3.1, 5.1, 2.3], [0, 0, 1]],
 [[4.7, 3.2, 1.6, 0.2], [1, 0, 0]],
 [[6.5, 3.0, 5.8, 2.2], [0, 0, 1]],
 [[6.4, 3.2, 4.5, 1.5], [0, 1, 0]],
 [[6.1, 3.0, 4.9, 1.8], [0, 0, 1]],
 [[5.7, 2.8, 4.5, 1.3], [0, 1, 0]],
 [[6.6, 3.0, 4.4, 1.4], [0, 1, 0]],
 [[5.4, 3.9, 1.7, 0.4], [1, 0, 0]],
 [[5.4, 3.4, 1.5, 0.4], [1, 0, 0]],
 [[5.0, 3.5, 1.6, 0.6], [1, 0, 0]],
 [[6.1, 2.9, 4.7, 1.4], [0, 1, 0]],
 [[5.1, 3.8, 1.9, 0.4], [1, 0, 0]],
 [[4.9, 3.1, 1.5, 0.1], [1, 0, 0]],
 [[5.0, 2.0, 3.5, 1.0], [0, 1, 0]],
 [[5.1, 3.8, 1.6, 0.2], [1, 0, 0]],
 [[5.0, 2.3, 3.3, 1.0], [0, 1, 0]],
 [[5.7, 4.4, 1.5, 0.4], [1, 0, 0]],
 [[5.2, 2.7, 3.9, 1.4], [0, 1, 0]],
 [[5.2, 4.1, 1.5, 0.1], [1, 0, 0]],
 [[5.5, 2.6, 4.4, 1.2], [0, 1, 0]],
 [[5.0, 3.5, 1.3, 0.3], [1, 0, 0]],
 [[5.0, 3.3, 1.4, 0.2], [1, 0, 0]],
 [[6.1, 2.6, 5.6, 1.4], [0, 0, 1]],
 [[4.8, 3.4, 1.6, 0.2], [1, 0, 0]],
 [[5.6, 2.7, 4.2, 1.3], [0, 1, 0]],
 [[5.7, 2.8, 4.1, 1.3], [0, 

In [12]:
nn = NN(4,4,3)
nn.train(training,iterations = 1000)
nn.test(test)

error: 16.630838808
error: 12.650962254
error: 3.267600769
error: 2.521075255
error: 2.230561157
error: 1.964197142
error: 1.761266466
error: 1.369104432
error: 1.437765494
error: 1.400586350
[5.8, 2.7, 3.9, 1.2] : 2 -> 2
[6.0, 2.7, 5.1, 1.6] : 2 -> 3
[6.9, 3.1, 5.1, 2.3] : 3 -> 3
[4.7, 3.2, 1.6, 0.2] : 1 -> 1
[6.5, 3.0, 5.8, 2.2] : 3 -> 3
[6.4, 3.2, 4.5, 1.5] : 2 -> 2
[6.1, 3.0, 4.9, 1.8] : 3 -> 3
[5.7, 2.8, 4.5, 1.3] : 2 -> 2
[6.6, 3.0, 4.4, 1.4] : 2 -> 2
[5.4, 3.9, 1.7, 0.4] : 1 -> 1
[5.4, 3.4, 1.5, 0.4] : 1 -> 1
[5.0, 3.5, 1.6, 0.6] : 1 -> 1
[6.1, 2.9, 4.7, 1.4] : 2 -> 2
[5.1, 3.8, 1.9, 0.4] : 1 -> 1
[4.9, 3.1, 1.5, 0.1] : 1 -> 1
[5.0, 2.0, 3.5, 1.0] : 2 -> 2
[5.1, 3.8, 1.6, 0.2] : 1 -> 1
[5.0, 2.3, 3.3, 1.0] : 2 -> 2
[5.7, 4.4, 1.5, 0.4] : 1 -> 1
[5.2, 2.7, 3.9, 1.4] : 2 -> 2
[5.2, 4.1, 1.5, 0.1] : 1 -> 1
[5.5, 2.6, 4.4, 1.2] : 2 -> 2
[5.0, 3.5, 1.3, 0.3] : 1 -> 1
[5.0, 3.3, 1.4, 0.2] : 1 -> 1
[6.1, 2.6, 5.6, 1.4] : 3 -> 3
[4.8, 3.4, 1.6, 0.2] : 1 -> 1
[5.6, 2.7, 4.2, 1.3] : 2 -> 

In [13]:
nn.weights()

输入层权重：
[0.5400333890552317, 0.5975298656268736, 0.5730044314322975, -1.2786125024833683, 0.501019609997483]
[0.7848797396654503, 0.8241139102605467, 0.8074448665649405, -1.617570630692628, 0.757995514469405]
[0.8751806785114692, 0.8876446813577039, 0.8822423223289259, 2.694554048149466, 0.867037994615244]
[0.5456576162610605, 0.5463470511398646, 0.5460185537319822, 2.5333187671897153, 0.5453145655296509]
[0.993114862570247, 1.0054002420132877, 1.000164022858411, -3.421909591932053, 0.9847658367185932]

隐藏层权重：
[0.3847489625919508, 0.00036096805320470597, -2.760121028497624]
[0.2802049186456196, -0.19105226616280974, -2.9125704534340753]
[0.33387645331746163, -0.10023029032123983, -2.837560335333305]
[-16.21464367398347, -0.6341868792480491, 13.835188794265816]
[0.4221349349075836, 0.09514369807837181, -2.69490608939632]
