<a href="https://colab.research.google.com/github/alexfarb/cae_arrhythmia_classification/blob/master/2019_03_01_af_cae_ecg_clf_nsv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


In [0]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
#import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from keras.layers import Input, Conv1D, MaxPooling1D, UpSampling1D
from keras.models import Model, load_model
from keras import backend as K
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix
from datetime import datetime
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support as score

Using TensorFlow backend.


In [0]:
class ProcessingData(object):
    # Metódo Construtor
    def __init__(self, dataset):
        self.dataset = dataset
        
    # Função pra agrupar os dados de acordo com as classes    
    def organize_data_labels(self, dataset_labels, label, dataset_length):
        if label == 4:
          rows = np.where(dataset_labels < 3)
        else:  
          rows = np.where(dataset_labels == label)
        rows_list = list(rows)
        rows = rows_list[0]
        labels = rows_list[1]
        samples = []
        for i in range(0,len(rows)):
            samples.append(self.dataset[rows[i],:])
        samples_array = np.asarray(samples)
        labels = np.reshape(labels,(len(labels),1))
        
        return samples_array, labels    
        
    # função para a normalização dos dados
    def normalize_data(self, data, min, max):
        return (data-min)/(max-min) # Cálculo para a normalização dos dados

    # Função para converter os dados de volta a escala original
    def original_scale_data(self, decoded, input, min, max):
        decoded_original_scale = decoded * (max - min) + min
        error = input - decoded_original_scale
        return decoded_original_scale, error

    # Função para salvar os resultados do treino em arquivo .csv
    def save_results_train(self, loss, cae_label):
        # Salva os resultados em um arquivo .csv
        loss_path = "/content/drive/My Drive/colab_apps/nsv/results/train/loss_%s.csv" %(cae_label)
        np.savetxt(loss_path, loss, delimiter=',', fmt='%s')
        return
    
    # Função para salvar os resultados do teste (versão simples) em arquivo .csv
    def save_results_test_simple(self, predicted, first_index, last_index):
        # Salva os resultados em um arquivo .csv
        predicted_path = "/content/drive/My Drive/colab_apps/nsv/results/test/simple/predicted_%s_%s.csv" % (first_index, last_index)
        np.savetxt(predicted_path, predicted, delimiter=',', fmt='%s')
        
    # Função para salvar os resultados do teste (versão completa) em arquivo .csv
    def save_results_test_full(self, decoded, predicted, error, first_index, last_index):
        # Salva os resultados em um arquivo .csv
        decoded_path = "/content/drive/My Drive/colab_apps/nsv/results/test/full/decoded_%s_%s.csv" %(first_index, last_index)
        predicted_path = "/content/drive/My Drive/colab_apps/nsv/results/test/full/predicted_%s_%s.csv" %(first_index, last_index)
        error_path = "/content/drive/My Drive/colab_apps/nsv/results/test/full/error_%s_%s.csv" %(first_index, last_index)
        np.savetxt(decoded_path, decoded, delimiter=',', fmt='%s')
        np.savetxt(predicted_path, predicted, delimiter=',', fmt='%s')
        np.savetxt(error_path, error, delimiter=',', fmt='%s')

In [0]:
# Classe para treinar o modelo
class AutoencoderTrain(object):
    # Metódo Construtor
    def __init__(self, input_train, output_train, data_length, data_dimension):
        self.input_train = input_train
        self.output_train = output_train
        self.data_length = data_length
        self.data_dimension = data_dimension
    
    # CAE para a reconstrução do sinal    
    def convolutional_autoencoder_1d(self, kernel_size, epochs, optimizer_option, loss_option, cae_path):
        x_train = np.expand_dims(self.input_train, axis=2) # Redimensionamento da entrada (treino) para o CAE
        y_train = np.expand_dims(self.output_train, axis=2) # Redimensionamento da saída (treino) para o CAE
        # Camadas de Convolução e Maxpooling para o Encoder    
        input_signal = Input(shape=(self.data_length,self.data_dimension))
        x = Conv1D(self.data_dimension, kernel_size, padding='same')(input_signal)
        x = MaxPooling1D(2, padding='same')(x)
        x = Conv1D(self.data_dimension, kernel_size, padding='same')(x)
        x = MaxPooling1D(2, padding='same')(x)
        x = Conv1D(self.data_dimension, kernel_size, padding='same')(x)
        encoded = MaxPooling1D(3, padding='same')(x) # Saída do Encoder
        
        # Camadas de Convolução e Upsampling para o Decoder
        x = Conv1D(self.data_dimension, kernel_size, padding='same')(encoded)
        x = UpSampling1D(3)(x)
        x = Conv1D(self.data_dimension, kernel_size, padding='same')(x)
        x = UpSampling1D(2)(x)
        x = Conv1D(self.data_dimension, kernel_size, padding='same')(x)
        x = UpSampling1D(2)(x)
        decoded = Conv1D(self.data_dimension, kernel_size, padding='same')(x) # Saída do Decoder
        
        # Compilação do Modelo
        autoencoder = Model(input_signal, decoded)
        autoencoder.compile(optimizer=optimizer_option, loss=loss_option)
        
        # Treinamento do Modelo
        history_callback = autoencoder.fit(x_train,y_train,
                         epochs=epochs,
                         verbose = 0)
        # Salva o histórico do erro para treino e teste
        loss_train = history_callback.history["loss"]

        # Salva o modelo treinado em arquivo .h5
        autoencoder.save(cae_path)
        
        # Saída do Treinamento
        decoded_train = autoencoder.predict(x_train)
        # Saída do Treinamento redimensionada
        decoded_train_reshaped = (decoded_train.reshape(len(self.input_train), self.data_length))
        # Erro de Treinamento
        return decoded_train_reshaped, loss_train

In [0]:
# Classe para rodar o treino
class RunTrain(object):
    def __init__(self, data_length, data_dimension, min, max, kernel_size, epochs, optimizer, loss):
        self.data_length = data_length
        self.data_dimension = data_dimension
        self.min = min
        self.max = max
        self.kernel_size = kernel_size
        self.epochs = epochs
        self.optimizer = optimizer
        self.loss = loss

    def run_cae(self, dataset, dataset_labels, cae_id):    
        cae_label = str(cae_id)
        cae_path = "/content/drive/My Drive/colab_apps/nsv/models/cae_%s.h5" %(cae_label)
        processing = ProcessingData(dataset)
        samples, labels = processing.organize_data_labels(dataset_labels, cae_id, len(dataset))
        # Normalização das entradas e saídas
        # x = processing.normalize_data(samples, self.min, self.max)
        x = samples
        y = x
        # CAE
        auto_encoder = AutoencoderTrain(x, y, self.data_length, self.data_dimension)
        decoded, loss = auto_encoder.convolutional_autoencoder_1d(self.kernel_size, 
                                                                  self.epochs, 
                                                                  self.optimizer, 
                                                                  self.loss, 
                                                                  cae_path)
        #decoded_original_scale, error = processing.original_scale_data(decoded, x, min, max)
        processing.save_results_train(loss, cae_label)
        return decoded, loss

In [0]:
# Classe para testar o modelo
class AutoencoderTest(object):
    def __init__(self):
        pass
      
    def autoencoder(self, data, min, max, model):
        data_proc = ProcessingData(data)
        data_original = data
#        data_norm = data_proc.normalize_data(data_original)
        data_norm = data
        data_reshaped = np.expand_dims(data_norm, axis=2)
        z = data_reshaped.T
        data_input = np.expand_dims(z, axis=2)
        predict_model_a = model.predict(data_input)
        decoded_reshaped = (predict_model_a.reshape(1, 180))
#        data_decoded = decoded_reshaped*(max - min) + min
        return decoded_reshaped


In [0]:
# Classe para rodar o teste
class RunTest(object):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def run_competitive_cae(self, data_index, min, max, mode, model_a, model_b, model_c):
        cae = AutoencoderTest()
        data_decoded_a = cae.autoencoder(self.dataset[data_index,:], min, max, 
                                         model_a)
        data_dim = np.expand_dims(self.dataset[data_index,:], axis=2)
        data_reshaped = data_dim.T
        msq_a = mean_squared_error(data_reshaped, data_decoded_a)
        data_decoded_b = cae.autoencoder(self.dataset[data_index,:], min, max, 
                                         model_b)
        msq_b = mean_squared_error(data_reshaped, data_decoded_b)
        data_decoded_c = cae.autoencoder(self.dataset[data_index,:], min, max, 
                                         model_c)
        msq_c = mean_squared_error(data_reshaped, data_decoded_c)
        #data_decoded_d = cae.autoencoder(self.dataset[data_index,:], min, max, 
        #                                 model_d)
        #msq_d = mean_squared_error(data_reshaped, data_decoded_d)
        msq_vector = np.array([msq_a, msq_b, msq_c])
        if mode == 0: # Modo Simples
            msq_min_index = np.argmin(msq_vector)
            return msq_min_index
        elif mode == 1: # Modo Completo
            msq_min = msq_vector.min()
            msq_min_index = np.argmin(msq_vector)
            decoded_vector = np.array([data_decoded_a, data_decoded_b, 
                                       data_decoded_c])
            return msq_min_index, msq_vector, decoded_vector
        else:
            pass

In [0]:
class TrainCae(object):
  def __init__(self):
    pass

  def main(self, cae_id, epochs):
    # Carrega a Base de Dados
    dataset_train = np.loadtxt("/content/drive/My Drive/colab_apps/datasets/ecg/mitdb/mlii_dechazal/signals/DS1_signals.csv",delimiter=",")
    dataset_labels_train = np.loadtxt("/content/drive/My Drive/colab_apps/datasets/ecg/mitdb/mlii_dechazal/labels/DS1_labels.csv",delimiter=",")
    dataset_labels_train = np.reshape(dataset_labels_train,(len(dataset_labels_train),1))
    #cae_id = 'n'
    # Variáveis para a configuração da CAE
    data_length = len(dataset_train.T) # Tamanho do sinal em número de amostras
    kernel_size =  20 # Tamanho do Kernel (Janela) de Convolução
    epochs = epochs # Quantidade de Épocas para o Treinamento da Rede
    data_dimension = 1 # Dimensão dos Dados
    optimizer = 'adamax'
    loss = 'mean_squared_error'
    min = np.amin(dataset_train)
    max = np.amax(dataset_train)
    # Divisão da Base de Dados em Grupos de acordo com as classes
    #processing = ProcessingData(dataset_train)

    cae_id = cae_id # 0 = N, 1 = S, 2 = V, 3 = F e 4[?] = Q
    # Treina a Rede N
    if cae_id == 0:
        train = RunTrain(data_length, data_dimension, min, max, kernel_size, epochs, optimizer, loss)
        decoded_original_scale, loss = train.run_cae(dataset_train, dataset_labels_train, cae_id)
    # Treina a Rede S
    elif cae_id == 1:
        train = RunTrain(data_length, data_dimension, min, max, kernel_size, epochs, optimizer, loss)
        decoded_original_scale, loss = train.run_cae(dataset_train, dataset_labels_train, cae_id)
    # Treina a Rede V
    elif cae_id == 2:
        train = RunTrain(data_length, data_dimension, min, max, kernel_size, epochs, optimizer, loss)
        decoded_original_scale, loss = train.run_cae(dataset_train, dataset_labels_train, cae_id)
    # Treina a Rede F    
    #elif cae_id == 3:
    #    train = RunTrain(data_length, data_dimension, min, max, kernel_size, epochs, optimizer, loss)
    #    decoded_original_scale, loss = train.run_cae(dataset_train, dataset_labels_train, cae_id)
    # Treina a Rede Q
    # Apenas para finalizar a condicional caso entre com algum valor diferente    
    else:
        pass
    # To show CAEs DataFrame with EQM and Epochs    
    loss_n_data = pd.read_csv("/content/drive/My Drive/colab_apps/ens/results/train/loss_0.csv",header=None)
    loss_s_data = pd.read_csv("/content/drive/My Drive/colab_apps/nsv/results/train/loss_1.csv",header=None)
    loss_v_data = pd.read_csv("/content/drive/My Drive/colab_apps/nsv/results/train/loss_2.csv",header=None)
    #loss_f_data = pd.read_csv("/content/drive/My Drive/colab_apps/results/train/loss_3.csv",header=None)
    loss_n = loss_n_data.iloc[-1]
    loss_s = loss_s_data.iloc[-1]
    loss_v = loss_v_data.iloc[-1]
    #loss_f = loss_f_data.iloc[-1]
    loss_n_epochs = len(loss_n_data)
    loss_s_epochs = len(loss_s_data)
    loss_v_epochs = len(loss_v_data)
    #loss_f_epochs = len(loss_f_data)
    epochs_values = [loss_n_epochs, loss_s_epochs, loss_v_epochs]
    df_train = pd.DataFrame([loss_n, loss_s, loss_v], index=['Rede N', 'Rede S', 'Rede V'])
    df_train = df_train.assign(Épocas = epochs_values)
    df_train.columns = ['EQM', 'Épocas']
    print(df_train.head())
    df_train.to_csv('/content/drive/My Drive/colab_apps/nsv/results/train/train_parameters.csv')

In [0]:
class TestCae(object):
  def __init__(self):
    pass

  def main(self, first_index, last_index):
    startTime = datetime.now()
    # Carrega a Base de Dados DS2
    dataset_test = np.loadtxt("/content/drive/My Drive/colab_apps/datasets/ecg/mitdb/mlii_dechazal/signals/DS2_signals.csv",delimiter=",")
    dataset_labels_test = np.loadtxt("/content/drive/My Drive/colab_apps/datasets/ecg/mitdb/mlii_dechazal/labels/DS2_labels.csv", delimiter=",")
    dataset_labels_test = np.reshape(dataset_labels_test, (len(dataset_labels_test), 1))
    net_type = 4
    data_proc = ProcessingData(dataset_test)
    dataset_test, dataset_labels_test_num = data_proc.organize_data_labels(dataset_labels_test, net_type, len(dataset_test))
    labels_rows, labels_num = np.where(dataset_labels_test < 3)
    labels_test = []
    for index in range(0, len(labels_rows)):
      labels_test.append(dataset_labels_test[labels_rows[index]])
    # Carrega os modelos salvos
    model_a = load_model('/content/drive/My Drive/colab_apps/ens/models/cae_0.h5')
    model_b = load_model('/content/drive/My Drive/colab_apps/nsv/models/cae_1.h5')
    model_c = load_model('/content/drive/My Drive/colab_apps/nsv/models/cae_2.h5')
    #model_d = load_model('/content/drive/My Drive/colab_apps/models/cae_3.h5')
    mode = 1
    # Valores de Minimo e Máximo do Conjunto de Dados
    max = 1024
    min = -1024
    # Indices onde se inicia e encerra as amostras para o teste
    first_index = first_index
    last_index = last_index
    # Correção para começar no indice correto da Matriz
    initial_index = first_index-1
    # Array com quais indices o sistema será testado
    data_array = np.arange(initial_index, last_index)
    # Instanciamento para a execução das redes
    cae = RunTest(dataset_test)
    data_save = ProcessingData(dataset_test)
    # Execução das Redes
    if mode == 0: # Modo Simples
        # Matrizes para salvar os resultados
        predicted_mat = []
        for data_index in data_array:
            predicted = cae.run_competitive_cae(data_index, min, max, mode, model_a, model_b, model_c)
            predicted_mat.append(predicted)
            #print(data_index)
        data_save.save_results_test_simple(predicted_mat, first_index, last_index)
    elif mode == 1: # Modo Completo
        # Matrizes para salvar os resultados
        error_mat = []
        predicted_mat = []
        decoded_mat = []
        for data_index in data_array:
            predicted, error, decoded = cae.run_competitive_cae(data_index, min, max, mode, model_a, model_b, model_c)
            error_mat.append(error)
            predicted_mat.append(predicted)
            decoded_mat.append(decoded)
        # Converte decoded para um array
        decoded_array = np.asarray(decoded_mat)
        decoded_array = decoded_array.reshape((3, 180))
        print(predicted_mat)
        print(error_mat)
        print(decoded_array)
        # Salva os resultados
        data_save.save_results_test_full(decoded_array, predicted_mat, error_mat, first_index, last_index)
    else:
        pass
    #print("Tempo de Execução = ", datetime.now() - startTime)
    # Armazena em um vetor linha as classes originais e as preditas
    data_true = labels_test
    pred_path = "/content/drive/My Drive/colab_apps/nsv/results/test/simple/predicted_%s_%s.csv" %(first_index, last_index)
    pred_mat = np.loadtxt(pred_path, delimiter=",")
    pred_mat = np.reshape(pred_mat, (len(pred_mat), 1))
    pred_mat = pred_mat.T
    #data_true = data_true.tolist()
    pred_mat = pred_mat.tolist()
    #data_true = data_true[0]
    data_true = data_true[initial_index:last_index]
    pred_mat = pred_mat[0]
    # Calcula Acurácia, Sensibilidade e +P
    accuracy = accuracy_score(data_true, pred_mat)
    print('Acc = ', accuracy)
    recall = recall_score(data_true, pred_mat, average='macro') 
    print('Sen = ', recall)
    #recall_class = recall_score(data_true, pred_mat, average=None) 
    #print('Sen_C = ', recall_class)
    #precision_class = precision_score(data_true, pred_mat, average=None) 
    #print('+P = ', precision_class)
    precision, recall, fscore, support = score(data_true, pred_mat)
    #print('+P: {}'.format(precision))
    #print('Sen: {}'.format(recall))
    recall_values = [precision[0], precision[1], precision[2]]
    df_score = pd.DataFrame([recall[0], recall[1], recall[2]], index=['N', 'S', 'V'])
    df_score = df_score.assign(P = recall_values)
    df_score.columns = ['Sen', 'P']
    print(df_score.head())
    #df_test = pd.DataFrame([recall_class[0], recall_class[1], recall_class[2], recall_class[3]], 
    #                       index=['Classe N', 'Classe S', 'Classe V', 'Classe F'])
    #df_test.columns = ['Sen', 'PPV']
    #df_test = df_test.assign(PPV = precision_class)
    #print(df_test.head())
    #df_test.to_csv('/content/drive/My Drive/colab_apps/results/test/test_sen_p.csv')

    cf = confusion_matrix(data_true, pred_mat)
    #cf_cm = pd.DataFrame(cf, index = [i for i in "NSVF"],
    #                  columns = [i for i in "NSVF"])
    #plt.figure(figsize=(6, 6))
    #sns_confmat = sns.heatmap(cf_cm, cbar = False, annot=True, fmt='g')
    #fig_path = "/content/drive/My Drive/colab_apps/results/test/confmat_%s_%s.png" %(first_index, last_index)
    #sns_confmat.savefig(fig_path)
    classes = ['N','S','V']
    plt.imshow(cf, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title("Matriz de Confusão")
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = 'd'
    thresh = cf.max() / 2.
    for i, j in itertools.product(range(cf.shape[0]), range(cf.shape[1])):
        plt.text(j, i, format(cf[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cf[i, j] > thresh else "black")

    plt.ylabel('Classes Preditas')
    plt.xlabel('Classes Originais')
    plt.tight_layout()
    fig_path = "/content/drive/My Drive/colab_apps/nsv/results/test/confmat_%s.png" %(accuracy)
    plt.savefig(fig_path)

In [0]:
cae_train = TrainCae()
cae_train.main(1,7900) # cae_id, epochs. Para 1, o valor ideal do EQM é próximo de 120

             EQM  Épocas
Rede N  0.000124     300
Rede S  0.000121    7900
Rede V  0.000144     450


In [0]:
cae_test = TestCae()
cae_test.main(1906, 1906) #49303

[2]
[array([0.00082691, 0.00080681, 0.00036797])]
[[0.95897627 0.9602603  0.95127964 0.9509216  0.9558284  0.9560146
  0.95455074 0.95563525 0.95780504 0.9592822  0.9612157  0.9632263
  0.9644496  0.9658858  0.96866274 0.9697824  0.97196025 0.9739761
  0.9741979  0.97509766 0.97406256 0.9728983  0.9735108  0.9728156
  0.9707221  0.97147065 0.9750744  0.9774318  0.9829928  0.98670626
  0.9881355  0.9895144  0.98691446 0.98453414 0.9824056  0.97969306
  0.97572744 0.97539926 0.9790303  0.98245496 0.98877335 0.99299204
  0.9950324  0.99456304 0.9904878  0.9844169  0.9750689  0.9675649
  0.95948887 0.9556709  0.95481163 0.95661116 0.9585098  0.9602809
  0.9590155  0.95414966 0.94656134 0.93429756 0.9183362  0.90313953
  0.8878195  0.8752389  0.86208296 0.85412693 0.8459556  0.84167117
  0.8372585  0.8353543  0.8349016  0.8335411  0.8340876  0.83145386
  0.8288482  0.8224025  0.8103501  0.7983338  0.78556085 0.7748803
  0.7753012  0.7825137  0.8030745  0.83247364 0.8675541  0.9038668
  0.94

OSError: ignored