#### CNN Model:

In [14]:
# libraries cnn model
import pandas as pd

import numpy as np
from numpy import mean
from numpy import std
from tensorflow import keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.callbacks import EarlyStopping

from sklearn.metrics import (confusion_matrix, accuracy_score, classification_report)

import plotly.figure_factory as ff

import os
import scipy.io as sio
from scipy.fft import fft

In [2]:
# load the dataset, returns train, test and validation X and y elements
def load_dataset():

	# load all data
	with open('data/processed/trainX.npy', 'rb') as f:
		trainX = np.load(f)
	with open('data/processed/trainy.npy', 'rb') as f:
		trainy = np.load(f)
	with open('data/processed/testX.npy', 'rb') as f:
		testX = np.load(f)
	with open('data/processed/testy.npy', 'rb') as f:
		testy = np.load(f)
	with open('data/processed/valX.npy', 'rb') as f:
		valX = np.load(f)
	with open('data/processed/valy.npy', 'rb') as f:
		valy = np.load(f)
	
	print("train: ",trainX.shape, trainy.shape, "\ntest: ", testX.shape, testy.shape, "\nval: ", valX.shape, valy.shape)
	
	return trainX, trainy, testX, testy, valX, valy

In [15]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy, valX, valy):

    verbose, epochs, batch_size = 2, 50, 32

    # Define early stopping criteria
    early_stop = EarlyStopping(monitor='val_accuracy', patience=5, mode='max')
    
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]

    model = Sequential()
    model.add(Conv1D(filters=4, kernel_size=3, activation='relu',input_shape=(n_timesteps,n_features)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=8, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose, validation_data=(valX, valy), callbacks=[early_stop])
    
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    pred_train = model.predict(trainX, verbose=0)
    pred_test = model.predict(testX, verbose=0)
    
    # save model
    models_dir = 'models/'
    existing_models = [filename for filename in os.listdir(models_dir) if filename.startswith('cnn_model')]
    num_model = len(existing_models)+1
    filename = f'models/cnn_model_{num_model}.h5'
    model.save(filename)

    return accuracy, pred_train, pred_test

In [4]:
# summarize scores
def summarize_results(scores):
	print(scores)
	m, s = mean(scores), std(scores)
	print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [5]:
trainX, trainy, testX, testy, valX, valy = load_dataset()

train:  (49209, 512, 2) (49209, 3) 
test:  (10545, 512, 2) (10545, 3) 
val:  (10545, 512, 2) (10545, 3)


In [6]:
pd.DataFrame(np.argmax(trainy,axis=1)).value_counts()

1    16899
0    16885
2    15425
dtype: int64

In [7]:
pd.DataFrame(np.argmax(testy,axis=1)).value_counts()

1    3600
0    3552
2    3393
dtype: int64

In [8]:
pd.DataFrame(np.argmax(valy,axis=1)).value_counts()

0    3617
1    3594
2    3334
dtype: int64

In [16]:
# run an experiment
def run_experiment(repeats=10):
	# load data
	trainX, trainy, testX, testy, valX, valy = load_dataset()
	# repeat experiment
	scores = list()
	train_accs = list()
	test_accs = list()
	for r in range(repeats):
		score, pred_train, pred_test = evaluate_model(trainX, trainy, testX, testy, valX, valy)
		score = score * 100.0
		print('>#%d: %.3f' % (r+1, score))
		scores.append(score)
		train_acc = accuracy_score(np.argmax(trainy,axis=1), np.argmax(pred_train,axis=1))*100
		test_acc = accuracy_score(np.argmax(testy,axis=1), np.argmax(pred_test,axis=1))*100
		train_accs.append(train_acc)
		test_accs.append(test_acc)

	# summarize results
	summarize_results(scores)
	print('Train accuracy: ')
	summarize_results(train_accs)
	print('Test accuracy: ')
	summarize_results(test_accs)


In [17]:
# run the experiment
run_experiment(1)

train:  (49209, 512, 2) (49209, 3) 
test:  (10545, 512, 2) (10545, 3) 
val:  (10545, 512, 2) (10545, 3)
Epoch 1/50
1538/1538 - 9s - loss: 10.5767 - accuracy: 0.6143 - val_loss: 0.6424 - val_accuracy: 0.7183 - 9s/epoch - 6ms/step
Epoch 2/50
1538/1538 - 9s - loss: 0.6207 - accuracy: 0.7279 - val_loss: 0.5377 - val_accuracy: 0.7688 - 9s/epoch - 6ms/step
Epoch 3/50
1538/1538 - 8s - loss: 0.5598 - accuracy: 0.7626 - val_loss: 0.4919 - val_accuracy: 0.7917 - 8s/epoch - 6ms/step
Epoch 4/50
1538/1538 - 9s - loss: 0.5204 - accuracy: 0.7814 - val_loss: 0.4478 - val_accuracy: 0.8163 - 9s/epoch - 6ms/step
Epoch 5/50
1538/1538 - 9s - loss: 0.4871 - accuracy: 0.7955 - val_loss: 0.5912 - val_accuracy: 0.7507 - 9s/epoch - 6ms/step
Epoch 6/50
1538/1538 - 10s - loss: 0.4276 - accuracy: 0.8255 - val_loss: 0.5511 - val_accuracy: 0.7825 - 10s/epoch - 6ms/step
Epoch 7/50
1538/1538 - 9s - loss: 0.3785 - accuracy: 0.8476 - val_loss: 0.3519 - val_accuracy: 0.8578 - 9s/epoch - 6ms/step
Epoch 8/50
1538/1538 - 9s

In [43]:
# load data
trainX, trainy, testX, testy = load_dataset()

score, pred_train, pred_test = evaluate_model(trainX, trainy, testX, testy)
score = score * 100.0
train_acc = accuracy_score(np.argmax(trainy,axis=1), np.argmax(pred_train,axis=1))
test_acc = accuracy_score(np.argmax(testy,axis=1), np.argmax(pred_test,axis=1))
print('Train accuracy: ',train_acc)
print('Test accuracy: ',test_acc)

# Me ha tardado 8.5s, training accuracy muy bueno, pero test peor

(3022, 512, 1) (3022, 3) (1489, 512, 1) (1489, 3)
Train accuracy:  1.0
Test accuracy:  0.9785090664875755


In [44]:
print(classification_report(np.argmax(testy,axis=1), np.argmax(pred_test,axis=1), target_names=['Healthy', 'OR fault', 'IR fault'],digits=4))

              precision    recall  f1-score   support

     Healthy     0.9637    0.9749    0.9693       518
    OR fault     1.0000    1.0000    1.0000       484
    IR fault     0.9730    0.9610    0.9669       487

    accuracy                         0.9785      1489
   macro avg     0.9789    0.9786    0.9787      1489
weighted avg     0.9785    0.9785    0.9785      1489



In [45]:
# training
# Construimos una visualización para la matriz de confusión
z_train = confusion_matrix(np.argmax(trainy,axis=1), np.argmax(pred_train,axis=1))
# Reformateo la matriz para que me quede mejor el gráfico
z_train[[0,2],:] = z_train[[2,0],:]
x = ['Healthy', 'OR fault', 'IR fault']
y = ['IR fault', 'OR fault', 'Healthy']
z_text = [[str(y) for y in x] for x in z_train]
heatmap = ff.create_annotated_heatmap(z_train, x=x, y=y, annotation_text=z_text, colorscale='tealrose')
heatmap.update_layout(title_text='Training',height=300,width=600,
                      xaxis_title="Predicted Label",yaxis_title="True Label")
heatmap.show()

In [46]:
# testing
# Construimos una visualización para la matriz de confusión
z_test = confusion_matrix(np.argmax(testy,axis=1), np.argmax(pred_test,axis=1))
# Reformateo la matriz para que me quede mejor el gráfico
z_test[[0,2],:] = z_test[[2,0],:]
x = ['Healthy', 'OR fault', 'IR fault']
y = ['IR fault', 'OR fault', 'Healthy']
z_text = [[str(y) for y in x] for x in z_test]
heatmap = ff.create_annotated_heatmap(z_test, x=x, y=y, annotation_text=z_text, colorscale='tealrose')
heatmap.update_layout(title_text='Testing',height=300,width=600,
                      xaxis_title="Predicted Label",yaxis_title="True Label")
heatmap.show()

In [47]:
indicesIRHealthy = np.where((np.argmax(testy,axis=1) == 2) & (np.argmax(pred_test,axis=1) == 0))[0]
testX_IRHealthy = testX[indicesIRHealthy]
testX_IRHealthy = testX_IRHealthy.reshape(testX_IRHealthy.shape[0],testX_IRHealthy.shape[1])
testX_IRHealthy.shape

(19, 512)

In [48]:
# load all data
X_ki01, y_ki01 = load_dataset_group('PPDataset/', 'KI01', 'N15_M07_F10') # artificial IR damage
X_ki04, y_ki04 = load_dataset_group('PPDataset/', 'KI04', 'N15_M07_F10') # real IR damage

In [49]:
matches_artificial = []
for i in range(testX_IRHealthy.shape[0]):
    for j in range(X_ki01.shape[0]):
        if(np.array_equal(testX_IRHealthy[i], X_ki01[j])):
            matches_artificial.append(True)
            break
print("Artificial IR damage mistaken for Healthy: ",len(matches_artificial))

matches_real = []
for i in range(testX_IRHealthy.shape[0]):
    for j in range(X_ki04.shape[0]):
        if(np.array_equal(testX_IRHealthy[i], X_ki04[j])):
            matches_real.append(True)
            break
print("Real IR damage mistaken for Healthy: ",len(matches_real))

Artificial IR damage mistaken for Healthy:  15
Real IR damage mistaken for Healthy:  4
