# BP神经网络预测鸢尾花种类

## 1.数据预处理

In [1]:
#读取鸢尾花数据
import pandas as pd
iris = pd.read_csv(r"dataset/iris.csv")
iris

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [2]:
# 将输出的类别修改为实数
# Iris-setosa为1
# Iris-versicolor为2
# Iris-virginica为3
iris.loc[(iris['class']=='Iris-setosa'),'class'] = 1
iris.loc[(iris['class']=='Iris-versicolor'),'class'] = 2
iris.loc[(iris['class']=='Iris-virginica'),'class'] = 3
iris

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,3
146,6.3,2.5,5.0,1.9,3
147,6.5,3.0,5.2,2.0,3
148,6.2,3.4,5.4,2.3,3


In [3]:
# 打乱数据顺序
# frac为所需要的比例，1为全需要
iris = iris.sample(frac=1)
iris

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
4,5.0,3.6,1.4,0.2,1
17,5.1,3.5,1.4,0.3,1
142,5.8,2.7,5.1,1.9,3
109,7.2,3.6,6.1,2.5,3
90,5.5,2.6,4.4,1.2,2
...,...,...,...,...,...
94,5.6,2.7,4.2,1.3,2
14,5.8,4.0,1.2,0.2,1
123,6.3,2.7,4.9,1.8,3
135,7.7,3.0,6.1,2.3,3


In [4]:
# 取特征值
iris_data = iris.values
iris_feature = iris_data[0:,0:4]
iris_feature[0:10]

array([[5.0, 3.6, 1.4, 0.2],
       [5.1, 3.5, 1.4, 0.3],
       [5.8, 2.7, 5.1, 1.9],
       [7.2, 3.6, 6.1, 2.5],
       [5.5, 2.6, 4.4, 1.2],
       [6.3, 3.3, 4.7, 1.6],
       [5.6, 2.9, 3.6, 1.3],
       [5.1, 3.8, 1.9, 0.4],
       [6.5, 2.8, 4.6, 1.5],
       [5.6, 3.0, 4.5, 1.5]], dtype=object)

In [5]:
len(iris_feature)

150

# 2.构建神经网络

In [6]:
# 生成大小为I*J的随机数矩阵，用于构建权重
import numpy as np
def makeArray(I,J):
    m = []
    for i in range(I):
        fill = np.random.random()
        m.append([fill]*J)
    return m

In [7]:
# 结点的激活函数sigmoid
import math
def sigmoid(x):
    return 1.0 / (1.0 + math.exp(-x))

In [8]:
# sigmoid的导函数
def dsigmoid(x):
    return x * (1-x)

In [9]:
class NN:
#     三层反向传播神经网络

    def  __init__(self,ni,nh,no):
#         定义神经网络结点个数，输入层和隐藏层增加偏置结点
        self.ni = ni + 1
        self.nh = nh + 1
        self.no = no
        
#         激活神经网络的所有结点（向量）
        self.ai = [1.0] * self.ni
        self.ah = [1.0] * self.nh
        self.ao = [1.0] * self.no
        
#         建立权重
        self.wi = makeArray(self.ni,self.nh)
        self.wo = makeArray(self.nh,self.no)
        
        
#     正向传播
    def update(self,inputs):
            
#         激活输入层
        for i in range(self.ni - 1):
            self.ai[i] = inputs[i]
        
#         激活隐藏层
        for j in range(self.nh):
            sum = 0.0
            for i in range(self.ni):
                sum = sum + self.ai[i] * self.wi[i][j]
            self.ah[j] = sigmoid(sum)
            
#         激活输出层
        for k in range(self.no):
            sum = 0.0
            for j in range(self.nh):
                sum = sum + self.ah[j] * self.wo[j][k]
            self.ao[k] = sigmoid(sum)
            
        return self.ao[:]
    
#     反向传播
    def backPropagate(self,targets,lr):
#         计算输出层的误差
        output_deltas = [0.0] * self.no
        for k in range(self.no):
            error = targets[k] - self.ao[k]
            output_deltas[k] = dsigmoid(self.ao[k]) * error
            
#         计算隐藏层的误差
        hidden_deltas = [0.0] * self.nh
        for j in range(self.nh):
            error = 0.0
            for k in range(self.no):
                error = error + output_deltas[k] * self.wo[j][k]
            hidden_deltas[j] = dsigmoid(self.ah[j]) * error
            
#         更新输出层权重
        for j in range(self.nh):
            for k in range(self.no):
                change = output_deltas[k] * self.ah[j]
                self.wo[j][k] = self.wo[j][k] + lr * change
                
#         更新输入层权重
        for i in range(self.ni):
            for j in range(self.nh):
                change = hidden_deltas[j] * self.ai[i]
                self.wi[i][j] = self.wi[i][j] + lr * change
                
#         计算误差
        error = 0.0
        error += 0.5 * (targets[k] - self.ao[k]) ** 2
        return error
    
    def test(self,patterns):
        count = 0
        for p in patterns:
            target = p[1].index(1) + 1
            result = self.update(p[0])
            index = result.index(max(result)) + 1
            print(p[0],':',target,'->',index)
#             cout += (target == index)
            if(target == index):
                count = count + 1
        accuracy = float(count / len(patterns))
        print ('accuracy: %-.9f' % accuracy)
        
    def weights(self):
        print('输入层权重：')
        for i in range(self.ni):
            print(self.wi[i])
        print()
        print('隐藏层权重：')
        for j in range(self.nh):
            print(self.wo[j])
            
    def train(self,patterns,iterations = 1000,lr = 0.1):
        for i in range(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.backPropagate(targets,lr)
            if i % 100 == 0:
                print('error: %-.9f' % error)

In [10]:
data = []
for i in range(len(iris_feature)):
    ele = []
    ele.append(list(iris_feature[i]))
    if iris_data[i][4] == 1:
        ele.append([1,0,0])
    elif iris_data[i][4] == 2:
        ele.append([0,1,0])
    else:
        ele.append([0,0,1])
    data.append(ele)

In [11]:
training = data[0:105]
test = data[106:]
test

[[[6.2, 2.2, 4.5, 1.5], [0, 1, 0]],
 [[6.5, 3.2, 5.1, 2.0], [0, 0, 1]],
 [[6.6, 2.9, 4.6, 1.3], [0, 1, 0]],
 [[6.7, 3.3, 5.7, 2.1], [0, 0, 1]],
 [[7.3, 2.9, 6.3, 1.8], [0, 0, 1]],
 [[6.4, 3.1, 5.5, 1.8], [0, 0, 1]],
 [[6.7, 3.0, 5.0, 1.7], [0, 1, 0]],
 [[4.9, 2.4, 3.3, 1.0], [0, 1, 0]],
 [[6.2, 2.9, 4.3, 1.3], [0, 1, 0]],
 [[5.5, 2.3, 4.0, 1.3], [0, 1, 0]],
 [[5.0, 3.4, 1.5, 0.2], [1, 0, 0]],
 [[4.6, 3.1, 1.5, 0.2], [1, 0, 0]],
 [[5.0, 2.0, 3.5, 1.0], [0, 1, 0]],
 [[7.0, 3.2, 4.7, 1.4], [0, 1, 0]],
 [[5.7, 2.8, 4.1, 1.3], [0, 1, 0]],
 [[5.2, 2.7, 3.9, 1.4], [0, 1, 0]],
 [[6.1, 2.8, 4.0, 1.3], [0, 1, 0]],
 [[6.9, 3.1, 5.4, 2.1], [0, 0, 1]],
 [[5.0, 3.2, 1.2, 0.2], [1, 0, 0]],
 [[5.5, 2.5, 4.0, 1.3], [0, 1, 0]],
 [[6.7, 3.1, 4.7, 1.5], [0, 1, 0]],
 [[6.7, 3.1, 4.4, 1.4], [0, 1, 0]],
 [[6.1, 3.0, 4.6, 1.4], [0, 1, 0]],
 [[6.9, 3.2, 5.7, 2.3], [0, 0, 1]],
 [[6.7, 3.3, 5.7, 2.5], [0, 0, 1]],
 [[6.8, 3.2, 5.9, 2.3], [0, 0, 1]],
 [[6.4, 2.7, 5.3, 1.9], [0, 0, 1]],
 [[6.4, 2.8, 5.6, 2.1], [0, 

In [12]:
nn = NN(4,4,3)
nn.train(training,iterations = 1000)
nn.test(test)

error: 16.788325199
error: 7.388752478
error: 3.280141483
error: 0.848986056
error: 0.709851736
error: 0.880451082
error: 1.157157976
error: 1.258978666
error: 1.528285289
error: 0.958044645
[6.2, 2.2, 4.5, 1.5] : 2 -> 3
[6.5, 3.2, 5.1, 2.0] : 3 -> 3
[6.6, 2.9, 4.6, 1.3] : 2 -> 2
[6.7, 3.3, 5.7, 2.1] : 3 -> 3
[7.3, 2.9, 6.3, 1.8] : 3 -> 3
[6.4, 3.1, 5.5, 1.8] : 3 -> 3
[6.7, 3.0, 5.0, 1.7] : 2 -> 2
[4.9, 2.4, 3.3, 1.0] : 2 -> 2
[6.2, 2.9, 4.3, 1.3] : 2 -> 2
[5.5, 2.3, 4.0, 1.3] : 2 -> 2
[5.0, 3.4, 1.5, 0.2] : 1 -> 1
[4.6, 3.1, 1.5, 0.2] : 1 -> 1
[5.0, 2.0, 3.5, 1.0] : 2 -> 2
[7.0, 3.2, 4.7, 1.4] : 2 -> 2
[5.7, 2.8, 4.1, 1.3] : 2 -> 2
[5.2, 2.7, 3.9, 1.4] : 2 -> 2
[6.1, 2.8, 4.0, 1.3] : 2 -> 2
[6.9, 3.1, 5.4, 2.1] : 3 -> 3
[5.0, 3.2, 1.2, 0.2] : 1 -> 1
[5.5, 2.5, 4.0, 1.3] : 2 -> 2
[6.7, 3.1, 4.7, 1.5] : 2 -> 2
[6.7, 3.1, 4.4, 1.4] : 2 -> 2
[6.1, 3.0, 4.6, 1.4] : 2 -> 2
[6.9, 3.2, 5.7, 2.3] : 3 -> 3
[6.7, 3.3, 5.7, 2.5] : 3 -> 3
[6.8, 3.2, 5.9, 2.3] : 3 -> 3
[6.4, 2.7, 5.3, 1.9] : 3 -> 3

In [13]:
nn.weights()

输入层权重：
[0.30563188105000993, 0.5002109776947306, 0.5072629022093491, -4.161579448522327, 0.5062240976711463]
[2.4333773497197986, 0.767445918232545, 0.7650363609045148, -9.466223606466615, 0.7656755062053946]
[-3.4197701344731426, 0.3537319954049937, 0.37773794052590776, 8.767540396971134, 0.3729717138337643]
[-1.5693058163698062, 0.17852223014988272, 0.19160094047725998, 9.785742012387557, 0.18893909395518024]
[0.8113573527683816, 0.5855306348573399, 0.5853289187557013, -5.174277010774653, 0.585449532277854]

隐藏层权重：
[7.557036148537164, -7.634063226854593, -1.3124312035893901]
[-1.0710665742224672, 1.25616303127309, -1.1224406682033232]
[-1.2947035902727753, 0.9698175015299089, -1.5010449394306558]
[-7.991729659252586, -6.576007298524501, 7.9080386634857796]
[-1.2198325731371396, 1.0598852014664262, -1.3933700492659173]
