## Bibliotecas

In [None]:
import collections
import math
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import tensorflow as tf

from collections import Counter
from scipy import interp

from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Conv2D, Dense, Dropout, MaxPooling2D, BatchNormalization, Activation, Flatten, GaussianNoise
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential, load_model, model_from_json
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Precision, Recall

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV, ParameterGrid, train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support , roc_auc_score, auc, precision_score, recall_score, f1_score, accuracy_score
from sklearn.utils import class_weight

from pickle import load
from timeit import default_timer as timer
from random import randint
from itertools import cycle

%run ./base_functions.ipynb
# %run ./config.ipynb

pd.options.mode.chained_assignment = None

In [None]:
tf.test.is_built_with_cuda()
tf.config.list_physical_devices('GPU')

### Fixando a seed

In [None]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value = randint(0, 99999)
print(seed_value)

os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)

## Parâmetros e configurações

In [None]:
# Definição dos caminhos das pastas

data_folder = "D:\\TEP - Matlab\\"
outputs_folder = "C:\\Users\\anaso\\Desktop\\workspace\\doutorado\\outputs\\"

In [None]:
TEST_CODE = 'PS8new-TF1'

### Funções

In [None]:
def vectorized_stride_ana(array, max_time, sub_window_size, stride_size):
    
    sub_windows = ( 
        np.expand_dims(np.arange(sub_window_size), 0) +
        np.expand_dims(np.arange(max_time + 1), 0).T
    )
    
    # Descobre o index da última coluna do array
    last_col_index = (array.shape[1])-1
    
    # Linha da matriz de índices que vai até o tamanho total do trecho que será convertido em matrizes
    cut_point = np.where(sub_windows[:,last_col_index] == len(array)-1)[0].item()
    
    # Faz o corte
    sub_windows_new = sub_windows[:cut_point+1] # adicionei o +1 pra bater com o número total de matreizes
    
    # Fancy indexing to select every V rows.
    return array[sub_windows_new[::stride_size]]

## Leitura dos dados

In [None]:
data = pd.read_csv(data_folder + "10-matlab-dados-3anos.csv", sep=';')
data_total = data.drop(['Unnamed: 0', 'XMV(12)'], 1)
np.shape(data_total)

In [None]:
status = pd.read_csv(data_folder + "10-params-3anos.csv", sep=';')
sim_status = status.drop(['Unnamed: 0'], 1)

## Pré-processamento dos dados

In [None]:
# Corte nos dados para reduzir o tamanho do banco

data_treino = data_total.iloc[219285:270208,:]
sim_status_treino = sim_status.iloc[320:390,:]

data_teste = data_total.iloc[309787:395492,:]
sim_status_teste = sim_status.iloc[450:580,:]

In [None]:
print(data_treino.shape)
print(data_teste.shape)

In [None]:
nlinhas = 52
ncolunas = 52
sliding_window = 5

ti = timer()

x_windows, y_windows, y_windows_ohe = matrix_generator(data_treino, sim_status_treino, nlinhas, ncolunas, sliding_window)

tf = timer()

In [None]:
print("Tempo total: " + str(int((tf-ti)//60)) + " minutos e " + str(math.ceil((tf-ti)%60))+ " segundos")

In [None]:
# Divisão randômica e estratificada em treino e validação

X_train, X_valid, y_train_multi, y_valid_multi = train_test_split(x_windows, y_windows, 
                                                                  test_size=0.15, 
                                                                  random_state=seed_value, 
                                                                  shuffle=True, 
                                                                  stratify=y_windows)

print("\nTREINO")
print("X: ", np.shape(X_train))
print("Y: ", np.shape(y_train_multi))
print("Status:", Counter(y_train_multi))

print("\nVALIDAÇÃO")
print("X: ", np.shape(X_valid))
print("Y: ", np.shape(y_valid_multi))
print("Status:", Counter(y_valid_multi))

In [None]:
STATUS = np.unique(y_train_multi)

# Faz um novo OHE
y_windows_train_ohe = to_categorical(y_train_multi, num_classes=len(STATUS))
y_windows_valid_ohe = to_categorical(y_valid_multi, num_classes=len(STATUS))

y_train = y_windows_train_ohe
y_valid = y_windows_valid_ohe

In [None]:
# Scaling

df_train = pd.DataFrame(X_train.reshape((nlinhas*X_train.shape[0], nlinhas)))
df_valid = pd.DataFrame(X_valid.reshape((nlinhas*X_valid.shape[0], nlinhas)))

scaler = StandardScaler()
scaler.fit(df_train)

df_train_norm = scaler.transform(df_train)
df_valid_norm = scaler.transform(df_valid)

In [None]:
back_to_4d_train = vectorized_stride_ana(df_train_norm, len(df_train_norm)-1, nlinhas, nlinhas)
x_train = back_to_4d_train.reshape((len(back_to_4d_train), nlinhas, ncolunas, 1), order='C')

back_to_4d_valid = vectorized_stride_ana(df_valid_norm, len(df_valid_norm)-1, nlinhas, nlinhas)
x_valid = back_to_4d_valid.reshape((len(back_to_4d_valid), nlinhas, ncolunas, 1), order='C')

print(X_train.shape)
print(x_train.shape)
print()

print(X_valid.shape)
print(x_valid.shape)

In [None]:
# Teste sem shuffle

# Scaling de acordo com o treino
extra_test_norm = pd.DataFrame(scaler.transform(data_teste), columns=data_teste.columns)

# Geração das matrizes
x_test, y_windows, y_windows_ohe = matrix_generator(extra_test_norm, sim_status_teste, nlinhas, ncolunas, \
                                                    sliding_window)

print()
print(x_test.shape)
print(Counter(y_windows))

y_test = to_categorical(y_windows, num_classes=len(STATUS))

In [None]:
print(x_test.shape)
print(y_test.shape)

## Modelagem do sistema de FDD

In [None]:
# Cálculo de class_weight para o caso multilabel

class_weights = class_weight.compute_class_weight(class_weight='balanced', 
                                                  classes=np.unique(y_train.argmax(axis=1)), 
                                                  y=y_train.argmax(axis=1))

class_weight_dict = {}
for i in range(len(STATUS)):
        class_weight_dict[i] = class_weights[i]

class_weight_dict

In [None]:
# Upload do modelo original

SOURCE_TEST_CODE = 'TS1'
training_type = "simple_model_"

if (training_type == "optuna_model_"):
    trial = 20
    json_file = open(outputs_folder + training_type + SOURCE_TEST_CODE + "_" + str(trial) + '-CNN.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights(outputs_folder + training_type + SOURCE_TEST_CODE + "_" + str(trial) + "-CNN.h5")
else:
    json_file = open(outputs_folder + training_type + SOURCE_TEST_CODE + '-CNN.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights(outputs_folder + training_type + SOURCE_TEST_CODE + "-CNN.h5")

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.001), \
                  metrics=['acc', Precision(), Recall()])

model.summary()

### Tranfer learning - Treino simples

In [None]:
# Cria um novo modelo sequencial 

new_model = Sequential()

for layer in model.layers[:-1]:     #extrai cada uma das camadas do modelo original
    new_model.add(layer)            #adiciona no modelo criado até a penultima camada

# Colocando as camadas intermediárias em modo de "hibernação"
# Colocar em modo de hibernação, garante que, durante o treinamento, os pesos não serão atualizados
for layer in new_model.layers:
    layer.trainable=False
    
new_model.summary()

In [None]:
# Adiciona as últimas camadas que farão a classificação das falhas para passarem pelo fine tunning 

new_model.add(Dense(len(STATUS), activation='softmax'))

# Mostra o novo modelo CNN
new_model.summary()

In [None]:
# Treinamento da rede convolucional

# Definição dos callbacks usados
early_stopping = EarlyStopping(monitor='val_loss', mode='auto', patience=20, restore_best_weights=True)
model_check = ModelCheckpoint(filepath=outputs_folder+"model_cp_"+TEST_CODE+".h5", monitor="val_loss", mode="auto")

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.0001), 
              metrics=['acc', Precision(), Recall()])

ti = timer()

history = model.fit(x_train, y_train, 
                    epochs=100, 
                    batch_size=500, 
                    validation_data=(x_valid, y_valid), 
                    verbose=1,
                    shuffle=False,
                    class_weight=class_weight_dict,
                    callbacks=[early_stopping, model_check]) 

save_model(model=model, iterator=TEST_CODE, train_type='simple', model_name='CNN')

tf = timer()

In [None]:
print("Tempo total: " + str(int((tf-ti)//60)) + " minutos e " + str(math.ceil((tf-ti)%60))+ " segundos")

In [None]:
import datetime
print(datetime.datetime.now())

## Análise dos resultados

In [None]:
# Gráficos - Treinamento x Validação

# Informações do treinamento
try:
    train_acc = history.history[model.metrics_names[1]]
    train_loss = history.history[model.metrics_names[0]]
    train_precision = history.history[model.metrics_names[2]]
    train_recall = history.history[model.metrics_names[3]]
except:
    # quando o history vem do CSV salvo do melhor modelo
    train_acc = history['acc']
    train_loss = history['loss']
    train_precision = history['precision_'+str(best_trial)]
    train_recall = history['recall_'+str(best_trial)]

# Informações da validação
try:
    val_acc = history.history['val_'+str(model.metrics_names[1])]
    val_loss = history.history['val_'+str(model.metrics_names[0])]
    val_precision = history.history['val_'+str(model.metrics_names[2])]
    val_recall = history.history['val_'+str(model.metrics_names[3])]
except:
    val_acc = history['val_acc']
    val_loss = history['val_loss']
    val_precision = history['val_precision_'+str(best_trial)]
    val_recall = history['val_recall_'+str(best_trial)]

epochs = range(1, len(train_acc) + 1)
fig = plt.figure(figsize=(16,8))
print("Épocas: ", len(epochs))

plt.subplot(2, 2, 1)
plt.plot(epochs, train_acc, '-bo', label='Training acc')
plt.plot(epochs, val_acc, '-ko', label='Validation acc')
plt.title('Accuracy')
plt.legend()

plt.subplot(2, 2, 2)
plt.plot(epochs, train_loss, '-bo', label='Training loss')
plt.plot(epochs, val_loss, '-ko', label='Validation loss')
plt.title('Loss')
plt.legend()

plt.subplot(2, 2, 3)
plt.plot(epochs, train_precision, '-bo', label='Training precision')
plt.plot(epochs, val_precision, '-ko', label='Validation precision')
plt.title('Precision')
plt.legend()

plt.subplot(2, 2, 4)
plt.plot(epochs, train_recall, '-bo', label='Training recall')
plt.plot(epochs, val_recall, '-ko', label='Validation recall')
plt.title('Recall')
plt.legend()

plt.show()

In [None]:
# Treinamento

train_metrics = display_metrics(x_train, y_train, model, STATUS, 'Treino', multi_problem=True) 
train_metrics

In [None]:
# Validação

valid_metrics = display_metrics(x_valid, y_valid, model, STATUS, 'Validação', multi_problem=True) 
valid_metrics

In [None]:
# Teste

test_metrics = display_metrics(x_test, y_test, model, STATUS, 'Teste', multi_problem=True) 
test_metrics

In [None]:
# Construção da curva ROC para o caso binário (base: classe 0)

model_pred = model.predict(x_test)

# Dados reais em OHE
y_test = y_test.copy()

# Predições em OHE
y_pred = pd.DataFrame(model_pred.argmax(axis=1)).astype('category')
y_pred = pd.get_dummies(y_pred).values

n_classes = len(STATUS)

# Calcula a curva ROC e a métrica AUC para cada classe
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i], )
    roc_auc[i] = auc(fpr[i], tpr[i])
    
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_pred.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

plt.figure(figsize=(5,5), dpi=300)
plt.plot(fpr[0], tpr[0], color='darkorange', lw=2, label='Curva ROC (area = %0.3f)' % roc_auc[0])
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Taxa de falsos positivos')
plt.ylabel('Taxa de verdadeiros positivos')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Curva ROC e cálculo da métrica AUC para todas as classes

roc_auc_scores = []

# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= n_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure(figsize=(12,12), dpi=300)
plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.3f})'''.format(roc_auc["micro"]),
         color='deeppink', linestyle=':', linewidth=4)

plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.3f})'''.format(roc_auc["macro"]),
         color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2, label='ROC curve of class {0} (area = {1:0.3f})'''.format(i, roc_auc[i]))
    roc_auc_scores.append(roc_auc[i])

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Taxa de falsos positivos')
plt.ylabel('Taxa de verdadeiros positivos')
plt.legend(loc="lower right")
plt.show()