In [None]:
import preprocess
import numpy as np
import pandas as pd
import seaborn as sns

from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix

from keras import Sequential
from keras.layers import *
from keras.utils import plot_model
from attention import Attention


In [None]:
# 训练参数
batch_size = 128
epochs = 20
num_classes = 10
length = 2048
BatchNorm = True # 是否批量归一化
number = 1000 # 每类样本的数量
normal = True # 是否标准化
rate = [0.7,0.2,0.1] # 测试集验证集划分比例


In [None]:
path = 'data/0HP'
x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess.prepro(d_path=path,length=length,
                                                                  number=number,
                                                                  normal=normal,
                                                                  rate=rate,
                                                                  enc=True, enc_step=28)
# 输入卷积的时候还需要修改一下，增加通道数目
x_train, x_valid, x_test = x_train[:,:,np.newaxis], x_valid[:,:,np.newaxis], x_test[:,:,np.newaxis]
# 输入数据的维度
input_shape =x_train.shape[1:]

print('训练样本维度:', x_train.shape)
print(x_train.shape[0], '训练样本个数')
print('验证样本的维度', x_valid.shape)
print(x_valid.shape[0], '验证样本个数')
print('测试样本的维度', x_test.shape)
print(x_test.shape[0], '测试样本个数')
input_shape,x_train


In [None]:
# 构建网络模型
model = Sequential(
    [
        Conv1D(filters=16, kernel_size=32, strides=2, padding='same', activation='relu', input_shape=input_shape),
        BatchNormalization(),
        Conv1D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling1D(pool_size=8),
        Conv1D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling1D(pool_size=4),
        Bidirectional(GRU(units=128, return_sequences=True)),
        Dropout(0.5),
        Attention(256),
        Dense(units=num_classes, activation='softmax')
    ],name='mynn'
)

# 显示模型结构
model.summary()
# plot_model(model=model, to_file='mynn.png', show_shapes=True)


In [None]:
# 编译模型 评价函数和损失函数相似，不过评价函数的结果不会用于训练过程中
model.compile(optimizer='Adam', loss='categorical_crossentropy',
              metrics=['accuracy'])

# 开始模型训练
history = model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs,
          verbose=1, validation_data=(x_valid, y_valid), shuffle=True)


In [None]:
# 评估模型
score = model.evaluate(x=x_test, y=y_test)
print("测试集上的损失：", score[0])
print("测试集上的精度:",score[1])


In [None]:
run_data = pd.DataFrame(history.history)
run_data.plot(figsize=(9, 6))

plt.grid(True, axis='y')
plt.legend(loc='upper right')
plt.xlabel('epochs')
plt.ylabel('rate')
plt.xticks(np.arange(0,21,2))
plt.ylim(-0.01, 1.01)
plt.savefig('fig_run.png')
plt.show()


In [None]:
# 模型预测
y_pred = model.predict(x_test)

# 将one-hot编码转为整型编码
y_test_int = np.array([np.argmax(i) for i in y_test])
y_pred_int = np.array([np.argmax(i) for i in y_pred])

# 创建混淆矩阵
test_confu_matrix = confusion_matrix(y_test_int, y_pred_int)


In [None]:
fig, ax = plt.subplots(figsize=(9, 6))

labels = ['12k_DE_B007_0', '12k_DE_B014_0', '12k_DE_B021_0', '12k_DE_IR007_0', '12k_DE_IR014_0',
          '12k_DE_IR021_0', '12k_DE_OR007@6_0', '12k_DE_OR014@6_0', '12k_DE_OR021@6_0', 'normal_0']

sns.heatmap(test_confu_matrix, annot = True, cmap = "Blues",  ax=ax)
ax.set_xticklabels(labels)
ax.set_yticklabels(labels)
plt.xticks(rotation=45)
plt.yticks(rotation=45)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('fig_confusion_matrix.png')
plt.show()
