## train.ipynb: Define and Train the model
- 测试各种读取数据的方式
- 读取数据升级
- 修改反向传播的训练方法为Adam
- 加入model.compile

In [1]:
# Import the necessary libraries
import numpy as np
import scipy
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import utils
import matplotlib.pyplot as plt
%matplotlib inline
#from sklearn.cross_validation import StratifiedKFold

In [2]:
# Global Variables
# The number of principal components to be retained in the PCA algorithm, 
# the number of retained features  n
numPCAcomponents = 30
# Patches windows size
windowSize = 5
# The proportion of Test sets
testRatio = 0.50

## 用glob 库进行读取

In [3]:
import glob

In [4]:
data_dir= "./predata"
data_root = glob.glob(data_dir + '/*')
print(data_root)

['./predata\\readme.txt', './predata\\X_All_WS_5_PCA_30_testRatio_0.8.npy', './predata\\X_test_WS_5_PCA_30_testRatio_0.8.npy', './predata\\X_train_WS_5_PCA_30_testRatio_0.8.npy', './predata\\y_All_WS_5_PCA_30_testRatio_0.8.npy', './predata\\y_test_WS_5_PCA_30_testRatio_0.8.npy', './predata\\y_train_WS_5_PCA_30_testRatio_0.8.npy']


In [5]:
for name in glob.glob(data_dir + '/*'):
    print(name)

./predata\readme.txt
./predata\X_All_WS_5_PCA_30_testRatio_0.8.npy
./predata\X_test_WS_5_PCA_30_testRatio_0.8.npy
./predata\X_train_WS_5_PCA_30_testRatio_0.8.npy
./predata\y_All_WS_5_PCA_30_testRatio_0.8.npy
./predata\y_test_WS_5_PCA_30_testRatio_0.8.npy
./predata\y_train_WS_5_PCA_30_testRatio_0.8.npy


## 用pathlib读取

In [6]:
import pathlib

In [7]:
data_dir= "./predata"
data_root = pathlib.Path(data_dir)
for item in data_root.iterdir():
    print(item)

predata\readme.txt
predata\X_All_WS_5_PCA_30_testRatio_0.8.npy
predata\X_test_WS_5_PCA_30_testRatio_0.8.npy
predata\X_train_WS_5_PCA_30_testRatio_0.8.npy
predata\y_All_WS_5_PCA_30_testRatio_0.8.npy
predata\y_test_WS_5_PCA_30_testRatio_0.8.npy
predata\y_train_WS_5_PCA_30_testRatio_0.8.npy


## 用OS读取

In [8]:
import os

In [9]:
# os.listdir 该函数返回指定的文件夹包含的文件或文件夹的名字的列表。
data_dir= "./predata"
data_root = os.listdir(path=data_dir)
data_root

['readme.txt',
 'X_All_WS_5_PCA_30_testRatio_0.8.npy',
 'X_test_WS_5_PCA_30_testRatio_0.8.npy',
 'X_train_WS_5_PCA_30_testRatio_0.8.npy',
 'y_All_WS_5_PCA_30_testRatio_0.8.npy',
 'y_test_WS_5_PCA_30_testRatio_0.8.npy',
 'y_train_WS_5_PCA_30_testRatio_0.8.npy']

In [10]:
# 应用
data_dir= ".\predata"
for each_file in os.listdir(data_dir):
    print(os.path.join(data_dir,each_file))

.\predata\readme.txt
.\predata\X_All_WS_5_PCA_30_testRatio_0.8.npy
.\predata\X_test_WS_5_PCA_30_testRatio_0.8.npy
.\predata\X_train_WS_5_PCA_30_testRatio_0.8.npy
.\predata\y_All_WS_5_PCA_30_testRatio_0.8.npy
.\predata\y_test_WS_5_PCA_30_testRatio_0.8.npy
.\predata\y_train_WS_5_PCA_30_testRatio_0.8.npy


In [11]:
# The proportion of Test sets
testRatio = 0.80

In [12]:
# 则是一种导入特定数据的方法，没有毛病
X_train = np.load("./predata/X_train_WS_" 
                  + str(windowSize) + "_PCA_" + str(numPCAcomponents) + 
                  "_testRatio_" + str(testRatio)  + ".npy")
y_train = np.load("./predata/y_train_WS_" 
                  + str(windowSize) + "_PCA_" + str(numPCAcomponents) + 
                  "_testRatio_" + str(testRatio) + ".npy")
X_test = np.load("./predata/X_test_WS_" 
                  + str(windowSize) + "_PCA_" + str(numPCAcomponents) + 
                 "_testRatio_" + str(testRatio)  + ".npy")
y_test = np.load("./predata/y_test_WS_" 
                  + str(windowSize) + "_PCA_" + str(numPCAcomponents) + 
                 "_testRatio_" + str(testRatio) + ".npy")
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((7919, 5, 5, 30), (7919,), (8200, 5, 5, 30), (8200,))

In [13]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((7919, 5, 5, 30), (7919,), (8200, 5, 5, 30), (8200,))

In [14]:
X_train.dtype, X_test.dtype, y_train.dtype, y_test.dtype

(dtype('float64'), dtype('float64'), dtype('float64'), dtype('float64'))

In [15]:
# Reshape data into (numberofsumples, channels, height, width)

# convert class labels to on-hot encoding
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)

# Define the input shape 
input_shape= X_train[0].shape
print(input_shape)

# number of filters
C1 = 3*numPCAcomponents
C1

(5, 5, 30)


90

In [16]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((7919, 5, 5, 30), (7919, 16), (8200, 5, 5, 30), (8200, 16))

In [17]:
# Define the model structure
model = Sequential()

model.add(Conv2D(C1, (3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(3*C1, (3, 3), activation='relu'))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(6*numPCAcomponents, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='softmax'))

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 3, 90)          24390     
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 1, 1, 270)         218970    
_________________________________________________________________
dropout (Dropout)            (None, 1, 1, 270)         0         
_________________________________________________________________
flatten (Flatten)            (None, 270)               0         
_________________________________________________________________
dense (Dense)                (None, 180)               48780     
_________________________________________________________________
dropout_1 (Dropout)          (None, 180)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                2

In [19]:
# Define optimization and train method

# Reduce learning rate when a metric has stopped improving.
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.9, patience=25, 
                              min_lr=0.000001, verbose=1)
# checkpointer = ModelCheckpoint(filepath=".\checkP\.checkpoint.h5", verbose=1, 
#                                save_best_only=False)
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
# sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', 
              optimizer=adam, 
              metrics=['accuracy'])

In [20]:
# Start to train model 
history = model.fit(X_train, y_train, 
                    batch_size=32, 
                    epochs=5, 
                    verbose=1, 
                    validation_data=(X_test, y_test),
                    callbacks=[reduce_lr],
                    shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# save the model with h5py
# import h5py
# from tensorflow.keras.models import load_model
# model.save('./model/HSI_model.h5')

In [22]:
# using plot_model module to save the model figure
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='./model/model.png', show_shapes=True)
print(history.history.keys())

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy', 'lr'])


In [None]:
# show the model figure
model_img = plt.imread('./model/model.png')
# plt.figure(dpi=180)
plt.imshow(model_img)
plt.show()

In [None]:
history.history

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.grid(True)
plt.legend(['train', 'test'], loc='upper left') 
plt.savefig("./result/model_accuracy_100.svg")
plt.show()

In [None]:
# summarize history for loss 
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.grid(True)
plt.legend(['train', 'test'], loc='upper right') 
plt.savefig("./result/model_loss_100.svg")
plt.show()