In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix
from scipy.io import wavfile
import tensorflow as tf
import tensorflow.keras.backend as K
import itertools
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from quality_control import get_FC_model, get_rf_cnn_model, get_rf_LSTM_model, \
                            get_rf_transformer_model, plot_confusion_matrix

In [None]:
# Load data
x_all = np.load('./data/x_all.npy') # RF
y_all = np.load('./data/y_all.npy') # QC Label

In [None]:
# split rate 0.2
x_train, x_test, y_train, y_test = train_test_split(x_all, y_all, test_size=0.2, shuffle=True, stratify=y_all)

x_train = x_train.reshape([x_train.shape[0], 600, 1]).astype(np.float32)
y_train = y_train.reshape([y_train.shape[0], 1]).astype(np.float32)

x_test = x_test.reshape([x_test.shape[0], 600, 1]).astype(np.float32)
y_test = y_test.reshape([y_test.shape[0], 1]).astype(np.float32)

In [None]:
# balanced sampling
pos_ids = np.where(y_train==1)
x_train_pos = x_train[pos_ids[0],:,:]
y_train_pos = np.zeros([x_train_pos.shape[0],2])
y_train_pos[:,0] = 1

neg_ids = np.where(y_train==0)
x_train_neg = x_train[neg_ids[0],:,:]
y_train_neg = np.zeros([x_train_neg.shape[0],2])
y_train_neg[:,1] = 1

pos_dataset = tf.data.Dataset.from_tensor_slices((x_train_pos, y_train_pos)).repeat()
neg_dataset = tf.data.Dataset.from_tensor_slices((x_train_neg, y_train_neg)).repeat()
balanced_dataset = tf.data.experimental.sample_from_datasets([pos_dataset, neg_dataset],weights=[0.5, 0.5])

# reshape y_test
pos_ids = np.where(y_test==1)
x_test_pos = x_test[pos_ids[0],:,:]
y_test_pos = np.zeros([x_test_pos.shape[0],2])
y_test_pos[:,0] = 1

neg_ids = np.where(y_test==0)
x_test_neg = x_test[neg_ids[0],:,:]
y_test_neg = np.zeros([x_test_neg.shape[0],2])
y_test_neg[:,1] = 1

pos_dataset_test = tf.data.Dataset.from_tensor_slices((x_test_pos, y_test_pos)).repeat()
neg_dataset_test = tf.data.Dataset.from_tensor_slices((x_test_neg, y_test_neg)).repeat()
balanced_dataset_test = tf.data.experimental.sample_from_datasets([pos_dataset_test, neg_dataset_test],weights=[0.5, 0.5])

# reshape y_test
pos_val_ids = np.where(y_test==1)
neg_val_ids = np.where(y_test==0)
y_test_temp = np.zeros([y_test.shape[0], 2])
y_test_temp[pos_val_ids[0], 0] = 1
y_test_temp[neg_val_ids[0], 1] = 1
y_test = y_test_temp

# Fully-conneted model

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_FC_model(input_shape)
checkpoint = ModelCheckpoint("FC_model_{epoch:d}_{val_loss:.4f}.h5", monitor='val_loss', period=1, save_best_only=True, mode='min')
earlyStopping = EarlyStopping(monitor='val_loss', mode='min',patience=20, min_delta=0.003)
history = model.fit(balanced_dataset.repeat().shuffle(1000).batch(8), steps_per_epoch=200, epochs=2000, verbose=True, callbacks=[checkpoint, earlyStopping], validation_data=balanced_dataset_test.shuffle(100).batch(8),validation_steps=200)
#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras_auc])

In [None]:
plt.figure(figsize=(8,8))
accuracy = history.history['loss']
val_accuracy = history.history['val_loss']
epochs = range(1, len(accuracy)+1)

plt.plot(epochs, accuracy, label='Training loss')
plt.plot(epochs, val_accuracy, label='Validation loss')
plt.xlabel('Epoch', size=20)
plt.ylabel('Loss', size=20)
plt.ylim([0,1.0])
plt.xlim([0,len(accuracy)+1])
plt.yticks(size=20)
plt.xticks(size=20)
plt.legend()
plt.savefig("history_FC", dpi=300)
plt.show()
plt.close()

In [None]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, fontsize=12)
    plt.yticks(tick_marks, classes, fontsize=12)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 fontsize=14,
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label', fontsize=12)
    plt.xlabel('Predicted label', fontsize=12)
    plt.savefig(title, dpi=300, bbox_inches='tight')


In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_FC_model(input_shape)
model.load_weights('./model/FC_model_39_0.3987.h5')
# Evaluate the model on the dataset
results = model.evaluate(x_test, y_test, batch_size=8)
print('test loss: %.6f, test acc:%.6f:', results)
predictions = model.predict(x_test)
true_class = tf.argmax( y_test, 1 )
predicted_class = tf.argmax( predictions, 1 )
confusion_matrix = tf.math.confusion_matrix( true_class, predicted_class, 2 )
plot_confusion_matrix(confusion_matrix.numpy(), classes=['Good','Bad'], normalize=True)#, title='FC Normalized confusion matrix')

# CNN model

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_rf_cnn_model(input_shape)
checkpoint = ModelCheckpoint("CNN_model_{epoch:d}_{val_loss:.4f}.h5", monitor='val_loss', period=1, save_best_only=True, mode='min')
earlyStopping = EarlyStopping(monitor='val_loss', mode='min', patience=50, min_delta=0.002)
history = model.fit(balanced_dataset.repeat().shuffle(1000).batch(8), steps_per_epoch=200, epochs=3000, verbose=True, callbacks=[checkpoint, earlyStopping], validation_data=balanced_dataset_test.shuffle(100).batch(8), validation_steps=200)
#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras_auc])

In [None]:
plt.figure(figsize=(8,8))
accuracy = history.history['loss']
val_accuracy = history.history['val_loss']
epochs = range(1, len(accuracy)+1)

plt.plot(epochs, accuracy, label='Training loss')
plt.plot(epochs, val_accuracy, label='Validation loss')
plt.xlabel('Epoch', size=20)
plt.ylabel('Loss', size=20)
plt.ylim([0,1.0])
plt.xlim([0,len(accuracy)+1])
plt.yticks(size=20)
plt.xticks(size=20)
plt.legend()
plt.savefig("history_CNN", dpi=300)
plt.show()
plt.close()

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_rf_cnn_model(input_shape)
model.load_weights('./model/CNN_model_63_0.2767.h5')
# Evaluate the model on the dataset
results = model.evaluate(x_test, y_test, batch_size=32)
print('test loss: %.6f, test acc:%.6f:', results)
predictions = model.predict(x_test)
true_class = tf.argmax( y_test, 1 )
predicted_class = tf.argmax( predictions, 1 )
confusion_matrix = tf.math.confusion_matrix( true_class, predicted_class, 2 )
plot_confusion_matrix(confusion_matrix.numpy(), classes=['Good','Bad'], normalize=True, title='CNN Normalized confusion matrix')

# CNN-BiLSTM model

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_rf_LSTM_model(input_shape)
checkpoint = ModelCheckpoint("LSTM_model_{epoch:d}_{val_loss:.4f}.h5", monitor='val_loss', period=1, save_best_only=True,mode='min')
earlyStopping = EarlyStopping(monitor='val_loss', patience=50, mode='min', min_delta=0.001)
history = model.fit(balanced_dataset.repeat().shuffle(1000).batch(8), steps_per_epoch=200, epochs=5000, verbose=True, callbacks=[checkpoint, earlyStopping], validation_data=balanced_dataset_test.shuffle(100).batch(8), validation_steps=200)
#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras_auc])

In [None]:
plt.figure(figsize=(8,8))
accuracy = history.history['loss']
val_accuracy = history.history['val_loss']
epochs = range(1, len(accuracy)+1)

plt.plot(epochs, accuracy, label='Training loss')
plt.plot(epochs, val_accuracy, label='Validation loss')
plt.xlabel('Epoch',size=20)
plt.ylabel('Loss',size=20)
plt.ylim([0,1.0])
plt.xlim([0,len(accuracy)+1])
plt.yticks(size=20)
plt.xticks(size=20)
plt.legend(prop={'size':20})
plt.savefig("history_LSTM", dpi=300)
plt.show()
plt.close()

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_rf_LSTM_model(input_shape)
model.load_weights('./model/LSTM_model_95_0.2452.h5')
# Evaluate the model on the dataset
results = model.evaluate(x_test, y_test, batch_size=32)
print('test loss: %.6f, test acc:%.6f:', results)
predictions = model.predict(x_test)
true_class = tf.argmax( y_test, 1 )
predicted_class = tf.argmax( predictions, 1 )
confusion_matrix = tf.math.confusion_matrix( true_class, predicted_class, 2 )
plot_confusion_matrix(confusion_matrix.numpy(), classes=['Good','Bad'],normalize=True, title='LSTM Normalized confusion matrix')

# CNN-BiLSTM-Trans model

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_rf_transformer_model(input_shape)
checkpoint = ModelCheckpoint("Transformer_model_{epoch:d}_{val_loss:.4f}.h5", monitor='val_loss', period=1, save_best_only=True,mode='min')
earlyStopping = EarlyStopping(monitor='val_loss',mode='min', patience=50, min_delta=0.003)
history = model.fit(balanced_dataset.repeat().shuffle(1000).batch(8), steps_per_epoch=200, epochs=5000, verbose=True, callbacks=[checkpoint, earlyStopping], validation_data=balanced_dataset_test.shuffle(100).batch(8),validation_steps=200)

In [None]:
plt.figure(figsize=(8,8))
accuracy = history.history['loss']
val_accuracy = history.history['val_loss']
epochs = range(1, len(accuracy)+1)

plt.plot(epochs, accuracy, label='Training loss')
plt.plot(epochs, val_accuracy, label='Validation loss')
plt.xlabel('Epoch',size=20)
plt.ylabel('Loss',size=20)
plt.ylim([0,1.0])
plt.xlim([0,len(accuracy)+1])
plt.yticks(size=20)
plt.xticks(size=20)
plt.legend(prop={'size':20})
plt.savefig("history_Transformer", dpi=300)
plt.show()
plt.close()

In [None]:
K.clear_session()
input_shape = [x_train.shape[1], 1]
model = get_rf_transformer_model(input_shape)
model.load_weights('./model/Transformer_model_35_0.2734.h5')
# Evaluate the model on the dataset
results = model.evaluate(x_test, y_test, batch_size=32)
print('test loss: %.6f, test acc:%.6f:', results)
predictions = model.predict(x_test)
true_class = tf.argmax( y_test, 1 )
predicted_class = tf.argmax( predictions, 1 )
confusion_matrix = tf.math.confusion_matrix( true_class, predicted_class, 2 )
plot_confusion_matrix(confusion_matrix.numpy(), classes=['Good','Bad'], normalize=True, title='Transformer Normalized confusion matrix')

# Test on RFs from smaller earthquake

In [None]:
choose_tmp = np.load('./data/small_earthquake_RF.npy') # 5.0 < mag < 5.5

input_shape = [choose_tmp.shape[1], 1]
model = get_rf_LSTM_model(input_shape)
model.load_weights('./model/LSTM_model_95_0.2452.h5')
predictions = model.predict(choose_tmp)