### 一、用神经网络实现鸢尾花分类

In [2]:
#导入Keras模型库，定义模型结构
from keras.models import Sequential  #导入顺序式模型
model = Sequential()  #构造一个模型对象model

In [4]:
from keras.layers import Dense, Dropout  #导入层次库
#通过堆叠层次来定义模型结构
model.add(Dense(16, activation='relu', input_shape=(4,)))  #隐层1
model.add(Dense(16, activation='relu'))  #隐层2
model.add(Dropout(0.25))  #隐层2随机失活25%
model.add(Dense(3, activation='softmax'))  #输出层

In [5]:
#编译网络模型
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"] )
#'categorical_crossentropy':多分类交叉熵损失函数   'adam':梯度下降算法   'metrics':监控指标列表，分类问题一般关心精度

In [6]:
#准备数据
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('D:\\2021-2022学年第一、二学期文件汇总\\第二学期文件\\深度学习与应用实践\\数据集\\Keras自带数据集\\iris.data.txt', header=None)
data.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class']
print(data.iloc[0:5,:])  #查看前5条数据

   sepal length  sepal width  petal length  petal width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [7]:
#数据特征X取值于前4列(数据中无编号列)
X = data.iloc[:,0:4].values.astype(float)
#将类名转换为整数
data.loc[data['class'] == 'Iris-setosa', 'class'] = 0
data.loc[data['class'] == 'Iris-versicolor', 'class'] = 1
data.loc[data['class'] == 'Iris-virginica', 'class'] = 2
#标签y取值于第5列
y = data.iloc[:,4].values.astype(int)

In [8]:
#分割数据为训练集和测试集
train_x, test_x, train_y, test_y = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)

In [9]:
#特征数据标准化，转换为均值0、标准差为1的分布
mean = train_x.mean(axis=0)
std = train_x.std(axis=0)
train_x = (train_x-mean)/std
test_x = (test_x-mean)/std  #测试集和验证中数据标准化，必须使用训练集中数据的均值和标准差
print(train_x[0:5,:])

[[ 0.61303014  0.10850105  0.94751783  0.73603967]
 [-0.56776627 -0.12400121  0.38491447  0.34808318]
 [-0.80392556  1.03851009 -1.30289562 -1.3330616 ]
 [ 0.25879121 -0.12400121  0.60995581  0.73603967]
 [ 0.61303014 -0.58900572  1.00377816  1.25331499]]


In [10]:
#将标签的结果类型转化为one-hot独热矩阵(多分类使用损失函数categorical_crossentropy ，标签必须为多类模式，即 one hot 编码的向量)
from keras.utils import np_utils
train_y_ohe = np_utils.to_categorical(train_y, 3)
test_y_ohe = np_utils.to_categorical(test_y, 3)
print("前5条测试数据标签值:", test_y[0:5])
print("前5条测试数据标签的独热码:\n", test_y_ohe[0:5])

前5条测试数据标签值: [2 1 0 2 0]
前5条测试数据标签的独热码:
 [[0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]]


In [11]:
#训练模型
model.fit(train_x, train_y_ohe, epochs=20, batch_size=1, verbose=2, validation_data=(test_x,test_y_ohe))
#epochs:迭代次数   batch_size:一次训练所抓取的数据样本数量  verbose:显示迭代过程   validation_data:验证集

Instructions for updating:
Use tf.cast instead.
Train on 120 samples, validate on 30 samples
Epoch 1/20
 - 1s - loss: 1.1010 - accuracy: 0.3500 - val_loss: 1.0736 - val_accuracy: 0.3667
Epoch 2/20
 - 0s - loss: 1.0564 - accuracy: 0.5000 - val_loss: 1.0186 - val_accuracy: 0.7333
Epoch 3/20
 - 0s - loss: 0.9683 - accuracy: 0.7167 - val_loss: 0.8634 - val_accuracy: 0.8000
Epoch 4/20
 - 0s - loss: 0.7941 - accuracy: 0.7917 - val_loss: 0.6517 - val_accuracy: 0.7333
Epoch 5/20
 - 0s - loss: 0.5670 - accuracy: 0.8500 - val_loss: 0.5359 - val_accuracy: 0.7333
Epoch 6/20
 - 0s - loss: 0.4616 - accuracy: 0.8167 - val_loss: 0.4326 - val_accuracy: 0.8333
Epoch 7/20
 - 0s - loss: 0.3636 - accuracy: 0.8917 - val_loss: 0.4226 - val_accuracy: 0.8000
Epoch 8/20
 - 0s - loss: 0.3324 - accuracy: 0.9000 - val_loss: 0.2795 - val_accuracy: 0.9000
Epoch 9/20
 - 0s - loss: 0.2907 - accuracy: 0.9000 - val_loss: 0.2252 - val_accuracy: 0.9333
Epoch 10/20
 - 0s - loss: 0.2376 - accuracy: 0.9167 - val_loss: 0.1642

<keras.callbacks.callbacks.History at 0x1bb6e15b8c8>

In [12]:
#评估模型
loss, accuracy = model.evaluate(test_x, test_y_ohe, verbose=2)
print('loss={}, accuracy={}'.format(loss,accuracy))

loss=0.021549079567193985, accuracy=1.0


In [13]:
#查看预测结果，属于各类的概率
classes = model.predict(test_x, batch_size=1, verbose=2)
print('测试样本数:', len(classes))
print('分类概率:\n', classes)

测试样本数: 30
分类概率:
 [[1.30108732e-04 1.17401620e-02 9.88129735e-01]
 [1.96139468e-03 9.83630121e-01 1.44085074e-02]
 [9.99736845e-01 1.51401808e-04 1.11810376e-04]
 [1.55222908e-04 1.33581404e-02 9.86486673e-01]
 [9.99733865e-01 1.53527086e-04 1.12639056e-04]
 [1.10519053e-04 1.03057837e-02 9.89583671e-01]
 [9.99735057e-01 1.52698616e-04 1.12316528e-04]
 [2.31557665e-03 9.65813398e-01 3.18710841e-02]
 [2.34395056e-03 9.73973393e-01 2.36825775e-02]
 [2.96352291e-03 9.79488313e-01 1.75482407e-02]
 [7.63197837e-04 8.42684135e-02 9.14968431e-01]
 [2.20927433e-03 9.78612423e-01 1.91782843e-02]
 [2.31752917e-03 9.77661192e-01 2.00212859e-02]
 [2.39464128e-03 9.58709240e-01 3.88961583e-02]
 [2.52015283e-03 9.49719906e-01 4.77598906e-02]
 [9.99730051e-01 1.56250419e-04 1.13694223e-04]
 [2.92511052e-03 8.93834591e-01 1.03240289e-01]
 [2.14024261e-03 9.71220374e-01 2.66393293e-02]
 [9.99724805e-01 1.60037656e-04 1.15149174e-04]
 [9.99736011e-01 1.51896311e-04 1.12003552e-04]
 [3.02180415e-04 2.5056

### 用神经网络实现手写数字识别

#### 数据准备

In [14]:
from keras.datasets import mnist

In [15]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data() #导入数据

In [16]:
print('训练集数据形状:', train_images.shape)
print('测试集数据形状:', test_images.shape)
print('训练集标签长度:', len(train_labels), '训练集标签(前5个):', train_labels[0:5])
print('测试集标签长度:', len(test_labels), '测试集标签(前5个):', test_labels[0:5])

训练集数据形状: (60000, 28, 28)
测试集数据形状: (10000, 28, 28)
训练集标签长度: 60000 训练集标签(前5个): [5 0 4 1 9]
测试集标签长度: 10000 测试集标签(前5个): [7 2 1 0 4]


In [17]:
#数据预处理
train_images = train_images.reshape(60000, 28 * 28)  #训练集数据形状重置
train_images = train_images.astype(float)/255  #归一化

test_images = test_images.reshape(10000, 28 * 28)  #测试集数据形状重置
test_images = test_images.astype(float)/255   #归一化

In [18]:
#独热编码
from keras.utils import to_categorical

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

#### 构建神经网络并预测

In [19]:
#导入Keras模型库，定义模型结构
from keras.models import Sequential  #导入顺序式模型
model = Sequential()  #构造一个模型对象model

In [20]:
from keras.layers import Dense, Dropout  #导入层次库
#通过堆叠层次来定义模型结构
model.add(Dense(512, activation='relu', input_shape=(28 * 28,)))  #隐层
model.add(Dropout(0.25))  #隐层随机失活25%
model.add(Dense(10, activation='softmax'))  #输出层

In [21]:
#编译网络模型
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"] )

In [22]:
#训练模型
model.fit(train_images, train_labels, epochs=10, batch_size=100, verbose=2)

Epoch 1/10
 - 4s - loss: 0.2792 - accuracy: 0.9190
Epoch 2/10
 - 4s - loss: 0.1202 - accuracy: 0.9645
Epoch 3/10
 - 4s - loss: 0.0848 - accuracy: 0.9743
Epoch 4/10
 - 4s - loss: 0.0656 - accuracy: 0.9795
Epoch 5/10
 - 4s - loss: 0.0524 - accuracy: 0.9840
Epoch 6/10
 - 4s - loss: 0.0423 - accuracy: 0.9865
Epoch 7/10
 - 4s - loss: 0.0359 - accuracy: 0.9889
Epoch 8/10
 - 4s - loss: 0.0322 - accuracy: 0.9898
Epoch 9/10
 - 4s - loss: 0.0284 - accuracy: 0.9908
Epoch 10/10
 - 4s - loss: 0.0242 - accuracy: 0.9922


<keras.callbacks.callbacks.History at 0x1bb003666c8>

In [23]:
#评估模型
loss, accuracy = model.evaluate(test_images, test_labels, verbose=2)
print('loss={}, accuracy={}'.format(loss,accuracy))

loss=0.05924447892890312, accuracy=0.9818000197410583


In [24]:
#查看预测结果，属于各类的概率
classes = model.predict(test_images, batch_size=50, verbose=2)
print('测试样本数:', len(classes))
print('分类概率:\n', classes)

测试样本数: 10000
分类概率:
 [[9.76836168e-10 9.45367340e-10 1.70532473e-07 ... 9.99984741e-01
  4.54643967e-09 1.81535199e-07]
 [5.28110444e-10 6.06754520e-06 9.99993920e-01 ... 4.90916420e-16
  8.35108604e-09 1.48418129e-14]
 [6.89076032e-08 9.99727428e-01 1.38642345e-05 ... 9.76964511e-05
  7.66004523e-05 8.99667896e-08]
 ...
 [5.04747516e-16 8.06594239e-14 1.96016892e-15 ... 2.20869381e-07
  1.18813029e-10 4.65554386e-08]
 [5.63234570e-10 4.29574128e-12 2.48469339e-14 ... 1.07705962e-10
  3.70525618e-06 3.15080392e-11]
 [4.50090676e-09 1.21598830e-13 3.13055359e-08 ... 4.08666868e-14
  3.21960094e-08 1.72497953e-11]]
