In [1]:
# Imports
import os, warnings
import librosa
import numpy as np
import tensorflow as tf
import soundfile as sf
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy import signal, fftpack
from collections import defaultdict
from testing_functions import test_hss
from process_functions import preprocessing_audio
from utils import find_and_open_audio, signal_segmentation, get_resp_segments
from heart_sound_segmentation.filter_and_sampling import downsampling_signal, \
    upsampling_signal
from source_separation.descriptor_functions import get_spectrogram
from IPython.display import Audio
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn import preprocessing, linear_model, svm
from sklearn.metrics import confusion_matrix, accuracy_score
from pybalu.feature_selection import sfs
from pybalu.feature_transformation import normalize

# Funciones previas

In [2]:
# Funciones de características
def get_filterbanks(N, samplerate, freq_lim, n_filters, norm_exp=1,
                    scale_type='mel', filter_type='triangular',
                    norm_filters=True, plot_filterbank=False):
    '''Función que permite obtener un banco de filtros linealmente
    espaciados o espaciados en frecuencia de mel para calcular
    coeficientes cepstrales.
    
    Parameters
    ----------
    N : ndarray
        Largo de la señal.
    samplerate : float
        Tasa de muestreo de la señal de entrada.
    freq_lim : float
        Frecuencia límite para calcular los coeficientes cepstrales.
    n_filters : int
        Cantidad de filtros a obtener.
    scale_type : {'mel', 'linear'}, optional
        Tipo de espaciado entre los bancos de filtros para el cálculo
        de los coeficientes cepstrales. Por defecto es 'mel' (MFCC). 
    filter_type : {'triangular', 'hanning', 'squared'}, optional
        Forma del filtro a utilizar para el cálculo de la energía en 
        cada banda. Por defecto es 'triangular'.
    inverse_func : {'dct', 'idft'}, optional
        Función a utilizar para obtener los coeficientes cepstrales.
        Por defecto es 'dct'.
    plot_filterbank : bool, optional
        Booleano que indica si se grafica el banco de filtros. Por 
        defecto es False.
    
    References
    ----------
    [1] http://practicalcryptography.com/miscellaneous/machine-learning/
        guide-mel-frequency-cepstral-coefficients-mfccs/
    [2] Xuedong Huang, Alex Acero, Hsiao-Wuen Hon - Spoken Language 
        Processing A Guide to Theory, Algorithm and System 
        Development-Prentice Hall PTR (2001)
    '''
    def _freq_to_bin(f):
        # Definición del bin correspondiente en la definición
        # del intervalo de cálculo. Se usa (N - 1) ya que los bins
        # se definen entre 0 y (N - 1) (largo N)
        return np.rint(f / samplerate * (N - 1)).astype(int)
    
    
    def _triangular_filter(bins_points):
        # Definición del banco de filtros
        filter_bank = np.zeros((n_filters, N))
        
        for i in range(1, n_filters + 1):
            # Tramo ascendente del filtro triangular
            filter_bank[i - 1][bins_points[i - 1]:bins_points[i] + 1] = \
                np.linspace(0, 1, abs(bins_points[i] - bins_points[i - 1] + 1))
            
            # Tramo descendente del filtro triangular
            filter_bank[i - 1][bins_points[i]:bins_points[i + 1] + 1] = \
                np.linspace(1, 0, abs(bins_points[i + 1] - bins_points[i] + 1))
            
        return filter_bank
    
    
    def _hanning_filter(bins_points):
        # Definición del banco de filtros
        filter_bank = np.zeros((n_filters, N))
        
        for i in range(1, n_filters + 1):
            # Tramo ascendente del filtro triangular
            filter_bank[i - 1][bins_points[i - 1]:bins_points[i + 1] + 1] = \
                np.hanning(abs(bins_points[i + 1] - bins_points[i - 1] + 1))
        
        return filter_bank
    
    
    def _squared_filter(bins_points):
        # Definición del banco de filtros
        filter_bank = np.zeros((n_filters, N))
        
        for i in range(1, n_filters + 1):
            # Tramo ascendente del filtro triangular
            filter_bank[i - 1][bins_points[i - 1]:bins_points[i + 1] + 1] = 1
        
        return filter_bank
    
    
    def _norm_filterbank(filter_bank):
        # Definición del banco de filtros de salida
        filter_bank_out = np.zeros((n_filters, N))
        
        # Normalizar los filtros a energía 1
        for i in range(n_filters):
            filter_bank_out[i] = filter_bank[i] / \
                                 sum(filter_bank[i] ** norm_exp)
            
        return filter_bank_out
    
    
    # Definición de los bines en base a las frecuencias de cada filtro
    if scale_type == 'linear':
        # Definición de las "n_filters" frecuencias equiespaciadas entre
        # 0 y freq_lim. Se le agregan 2 puntos (0 y el freq_lim) ya que se 
        # necesitan para definir los límites de los filtros.
        freqs = np.arange(0, (n_filters + 1) + 1) * freq_lim / (n_filters + 1)
    
    
    elif scale_type == 'mel':
        # Definición del límite en frecuencias de mel (para no pasarse del
        # freq_lim al devolverse)
        mel_freq_lim = 2595 * np.log10(1 + freq_lim / 700)
        
        # Definición de las "n_filters" frecuencias espaciadas en escala mel 
        # entre 0 y freq_lim. Se le agregan 2 puntos (0 y el freq_lim) ya 
        # que se necesitan para definir los límites de los filtros.
        mel_freqs = np.arange(0, (n_filters + 1) + 1) * mel_freq_lim / (n_filters + 1)
        
        # Transformando de intervalos equi espaciados usando la escala
        # de mel. Es necesario hacer la transformación inversa ya que
        # en este caso se dice que lo equi espaciado viene de mel
        freqs = 700 * (10 ** (mel_freqs / 2595) - 1)
    
    else:
        raise Exception('Opción de tipo de coeficiente cepstral no válido.')
    
    
    # Transformando a bins
    bins_to = _freq_to_bin(freqs)
    
    
    # Obtención del banco de filtros
    if filter_type == 'triangular':
        filter_bank = _triangular_filter(bins_to)
        
    if filter_type == 'hanning':
        filter_bank = _hanning_filter(bins_to)
    
    elif filter_type == 'squared':
        filter_bank = _squared_filter(bins_to)
    
    # Normalizar por la energía de la señal
    if norm_filters:
        filter_bank = _norm_filterbank(filter_bank)
    
    
    # Gráfico del banco de filtros
    if plot_filterbank:
        plt.figure()
        
        # Definición del vector de frecuencias
        f_plot = np.arange(N) * samplerate / N
        
        for i in range(n_filters):
            plt.plot(filter_bank[i])
            # plt.plot(f_plot, filter_bank[i])

        for i in bins_to:
            # plt.axvline(i * samplerate / N, c='silver', linestyle=':')
            plt.axvline(i, c='silver', linestyle=':')
            
        # plt.xlim([0, freq_lim])
        plt.xlim([0, bins_to[-1]])
        plt.show()
    
    
    return filter_bank


def get_cepstral_coefficients(signal_in, samplerate, spectrogram_params,
                              freq_lim, n_filters, n_coefs, scale_type='mel', 
                              filter_type='triangular', inverse_func='dct', 
                              norm_filters=True, plot_filterbank=False, 
                              power=2):
    '''Función que permite obtener los coeficientes cepstrales a partir de 
    un banco de filtros.
    
    Parameters
    ----------
    signal_in : ndarray
        Señal de entrada.
    samplerate : float
        Tasa de muestreo de la señal de entrada.
    freq_lim : float
        Frecuencia límite para calcular los coeficientes cepstrales.
    n_coefs : int
        Cantidad de coeficientes a obtener.
    scale_type : {'mel', 'linear'}, optional
        Tipo de espaciado entre los bancos de filtros para el cálculo
        de los coeficientes cepstrales. Por defecto es 'mel' (MFCC). 
    filter_type : {'triangular', 'hanning', 'squared'}, optional
        Forma del filtro a utilizar para el cálculo de la energía en 
        cada banda. Por defecto es 'triangular'.
    inverse_func : {'dct', 'idft'}, optional
        Función a utilizar para obtener los coeficientes cepstrales.
        Por defecto es 'dct'.
    plot_filterbank : bool, optional
        Booleano que indica si se grafica el banco de filtros. Por 
        defecto es False.
    
    References
    ----------
    [1] http://practicalcryptography.com/miscellaneous/machine-learning/
        guide-mel-frequency-cepstral-coefficients-mfccs/
    [2] Xuedong Huang, Alex Acero, Hsiao-Wuen Hon - Spoken Language 
        Processing A Guide to Theory, Algorithm and System 
        Development-Prentice Hall PTR (2001)
    '''    
    # Definición de la cantidad de puntos a considerar
    filter_bank = get_filterbanks(spectrogram_params['N'], samplerate, 
                                  freq_lim=freq_lim, n_filters=n_filters, 
                                  scale_type=scale_type, 
                                  filter_type=filter_type,
                                  norm_filters=norm_filters, 
                                  plot_filterbank=plot_filterbank)
    
    # Obtener el espectrograma de la señal
    t, f, S = get_spectrogram(signal_in, samplerate, N=spectrogram_params['N'], 
                              padding=spectrogram_params['padding'], 
                              repeat=spectrogram_params['repeat'], 
                              noverlap=spectrogram_params['noverlap'], 
                              window=spectrogram_params['window'], 
                              whole=True)
    
    # Definición del espectro de la señal
    energy_spectrum = np.abs(S) ** power
    
    # Se aplica el banco de filtros sobre el espectro de la señal
    energy_coefs = np.dot(filter_bank, energy_spectrum)
    
    # Aplicando el logaritmo
    energy_coefs = np.log(energy_coefs + 1e-10)
    
    # Calculando los coeficientes cepstrales
    if inverse_func == 'dct':
        cepstral_coefs = fftpack.dct(energy_coefs, norm='ortho', axis=0)
    elif inverse_func == 'idft':
        cepstral_coefs = np.fft.ifft(energy_coefs, axis=-1).real
    else:
        raise Exception('Opción de tipo de función inversa no válida.')
    
    
    return cepstral_coefs[:n_coefs]


def get_bands_coefficients(signal_in, samplerate, spectrogram_params,
                           freq_lim, n_coefs, scale_type='mel', 
                           filter_type='triangular', norm_filters=True, 
                           plot_filterbank=False, 
                           power=2):
    '''Función que permite obtener la energía por bandas de frecuencia
    a partir de un banco de filtros.
    
    Parameters
    ----------
    signal_in : ndarray
        Señal de entrada.
    samplerate : float
        Tasa de muestreo de la señal de entrada.
    freq_lim : float
        Frecuencia límite para calcular los coeficientes cepstrales.
    n_coefs : int
        Cantidad de coeficientes a obtener.
    scale_type : {'mel', 'linear'}, optional
        Tipo de espaciado entre los bancos de filtros para el cálculo
        de los coeficientes cepstrales. Por defecto es 'mel' (MFCC). 
    filter_type : {'triangular', 'hanning', 'squared'}, optional
        Forma del filtro a utilizar para el cálculo de la energía en 
        cada banda. Por defecto es 'triangular'.
    inverse_func : {'dct', 'idft'}, optional
        Función a utilizar para obtener los coeficientes cepstrales.
        Por defecto es 'dct'.
    plot_filterbank : bool, optional
        Booleano que indica si se grafica el banco de filtros. Por 
        defecto es False.
    
    References
    ----------
    [1] http://practicalcryptography.com/miscellaneous/machine-learning/
        guide-mel-frequency-cepstral-coefficients-mfccs/
    [2] Xuedong Huang, Alex Acero, Hsiao-Wuen Hon - Spoken Language 
        Processing A Guide to Theory, Algorithm and System 
        Development-Prentice Hall PTR (2001)
    '''    
    # Definición de la cantidad de puntos a considerar
    filter_bank = get_filterbanks(spectrogram_params['N'], samplerate, 
                                  freq_lim=freq_lim, 
                                  n_coefs=n_coefs, scale_type=scale_type, 
                                  filter_type=filter_type,
                                  norm_filters=norm_filters, 
                                  plot_filterbank=plot_filterbank)
    
    # Obtener el espectrograma de la señal
    t, f, S = get_spectrogram(signal_in, samplerate, N=spectrogram_params['N'], 
                              padding=spectrogram_params['padding'], 
                              repeat=spectrogram_params['repeat'], 
                              noverlap=spectrogram_params['noverlap'], 
                              window=spectrogram_params['window'], 
                              whole=True)
    
    # Definición del espectro de la señal
    energy_spectrum = np.abs(S) ** power
    
    # Se aplica el banco de filtros sobre el espectro de la señal
    energy_coefs = np.dot(filter_bank, energy_spectrum)
    
    return energy_coefs


def get_energy_bands(signal_in, samplerate, spectrogram_params, 
                     fmin=0, fmax=1000, fband=20, power=2):
    '''Función que permite definir un espectrograma en bandas de 
    energía.
    
    
    Parameters
    ----------
    signal_in : ndarray
        Señal de entrada.
    samplerate : float
        Tasa de muestreo de la señal de entrada.
    spectrogram_params : dict
        Parámetros del espectrograma.
    fmin : float, optional
        Frecuencia mínima a considerar en el intervalo de interés.
        Por defecto es 0.
    fmax : float, optional
        Frecuencia máxima a considerar en el intervalo de interés.
        Este valor no puede mayor a samplerate / 2. Por defecto 
        es 1000.
    fband : float, optional
        Ancho de cada banda de frecuencia entre fmin y fmax. Por 
        defecto es 20.
    power : float, optional
        Exponente con el que se calcula la energía.
    
    Returns
    -------
    energy_S : ndarray
        Bandas de energía a través del tiempo (formato 
        espectrograma) con dimensión (#bandas x #bins de tiempo 
        del espectrograma).     
    '''
    # Obtener el espectrograma
    t, f, S = get_spectrogram(signal_in, samplerate, 
                              N=spectrogram_params['N'], 
                              padding=spectrogram_params['padding'], 
                              repeat=spectrogram_params['repeat'], 
                              noverlap=spectrogram_params['noverlap'], 
                              window=spectrogram_params['window'], 
                              whole=False)
    
    # Definición de los intervalos
    f_intervals = np.arange(fmin, fmax, fband)

    # Definición de la lista que almacenará los datos
    energy_band = np.zeros(len(f_intervals) - 1)
    energy_S = np.zeros((len(energy_band), len(t)))

    for i in range(len(f_intervals) - 1):
        lower_lim = f_intervals[i]
        upper_lim = f_intervals[i + 1]

        # Definición de los índices de interés
        indexes = np.where((lower_lim <= f) & (f <= upper_lim))[0]

        # Definiendo el valor
        energy_S[i] = np.sum(abs(S[indexes,:]) ** power, axis=0)
    
    return energy_S


In [3]:
def _conditioning_signal(signal_in, samplerate, samplerate_to):
    # Acondicionando en caso de que no tenga samplerate de 1000 Hz.
    if samplerate < samplerate_to:
        print(f'Upsampling de la señal de fs = {samplerate} Hz '
              f'a fs = {samplerate_to} Hz.') 
        new_rate = samplerate_to           
        audio_to = upsampling_signal(signal_in, samplerate, new_samplerate=new_rate)

    elif samplerate > samplerate_to:
        print(f'Downsampling de la señal de fs = {samplerate} Hz '
              f'a fs = {samplerate_to} Hz.')
        new_rate, audio_to = downsampling_signal(signal_in, samplerate, 
                                                 freq_pass=samplerate_to//2-100, 
                                                 freq_stop=samplerate_to//2)

    else:
        print(f'Samplerate adecuado a fs = {samplerate} Hz.')
        audio_to = signal_in
        new_rate = samplerate_to

    # Mensaje para asegurar
    print(f'Señal acondicionada a {new_rate} Hz para la separación de fuentes.')

    # Asegurándose de que el largo de la señal sea par
    if len(audio_to) % 2 != 0:
        audio_to = np.concatenate((audio_to, [0]))

    return audio_to, new_rate

In [4]:
def pybalu_clean(features, tol=1e-8, show=False):
    n_features = features.shape[1]
    ip = np.ones(n_features, dtype=int)

    # cleaning correlated features
    warnings.filterwarnings('ignore')
    C = np.abs(np.corrcoef(features, rowvar=False))
    idxs = np.vstack(np.where(C > .99))
    
    # remove pairs of same feature ( feature i will have a correlation of 1 whit itself )
    idxs = idxs[:, idxs[0,:] != idxs[1,:]]
    
    # remove correlated features
    if idxs.size > 0:
        ip[np.max(idxs, 0)] = 0
    
    # remove constant features
    s = features.std(axis=0, ddof=1)
    ip[s < tol] = 0
    p = np.where(ip.astype(bool))[0]

    if show:
        print(f'Clean: number of features reduced from {n_features} to {p.size}.')

    return p

# Parámetros de los descriptores

In [5]:
# Parámetro base de datos
preprocess = True

# Parámetros de los espectrogramas generales
N = 1024
noverlap = int(0.9 * N)
spec_params = {'N': N, 'noverlap': noverlap, 'window': 'hann', 
               'padding': 0, 'repeat': 0}

# Parámetros MFCC
mfcc_params = {'n_mfcc': 50, 'n_filters': 50, 'spec_params': spec_params,
               'freq_lim': 2000, 'norm_filters': True, 'power': 2}
lfcc_params = {'n_mfcc': 50, 'n_filters': 50, 'spec_params': spec_params,
               'freq_lim': 2000, 'norm_filters': True, 'power': 2}
energy_params = {'spec_params': spec_params, 'fmin': 0, 'fmax': 1000, 
                 'fband': 20}

# Parámetros de extracción de características
collapse_mfcc = 'mean'
func_type = 'cepstral_coefficients'

In [6]:
# Dirección de la base de datos
db_original = 'C:/Users/Chris/Desktop/Scripts_Magister/Respiratory_Sound_Database/audio_and_txt_files'
db_folder = 'preprocessed_signals_OLD'

# Nombres de los archivos
filenames = [i[:-4] for i in os.listdir(db_folder) if i.endswith('.wav') and not 'Tc' in i]

# Extracción de características

In [7]:
%matplotlib notebook

# Definición de los arrays donde se acumulará las características
X_data_mean = list()
X_data_max = list()
X_segments = list()

# Definición de los arrays donde se acumularán las etiquetas
Y_wheeze = list()
Y_crackl = list()

# Diccionario que indica los segmentos que corresponden a cada paciente
patient_register = defaultdict(list)

# Contador de los segmentos
seg_i = 0

# Nombre del archivo .wav a utilizar
for num, name in enumerate(filenames):
    print(f'Iteración {num + 1}: {name}')
    print(f'--------------------------')
    
    # Definición del paciente de interés
    patient = name.split('_')[0]
    
    if preprocess:
        filename = f'{db_folder}/{name}'
    else:
        filename = f'{db_original}/{name}'

    # Cargando el archivo
    try:
        samplerate, resp_signal = wavfile.read(f'{filename}.wav')
    except:
        resp_signal, samplerate = sf.read(f'{filename}.wav')
    
    print(f'Samplerate = {samplerate}, largo = {resp_signal.shape}')
    
    # Normalizando
    resp_signal = resp_signal / max(abs(resp_signal))
    
    
    # Obteniendo la información de los segmentos de este archivo de audio
    resp_list_info = get_resp_segments(resp_signal, samplerate, 
                                       filepath=f'{db_original}/{name}.txt')
    
    
    # Para cada segmento, se obtiene la información de interés
    for resp_info in resp_list_info:
        # Registrando
        patient_register[patient].append(seg_i)
        seg_i += 1
        
        # Agregando a la lista el segmento
        X_segments.append(resp_info[0])
        
        ### Calculando las características a partir del segmento ###
        
        # Cálculo del MFCC
        mfcc_features = \
            get_cepstral_coefficients(resp_info[0], samplerate, 
                                      spectrogram_params=mfcc_params['spec_params'],
                                      freq_lim=mfcc_params['freq_lim'], 
                                      n_filters=mfcc_params['n_filters'], 
                                      n_coefs=mfcc_params['n_mfcc'], 
                                      scale_type='mel', 
                                      filter_type='triangular', inverse_func='dct', 
                                      norm_filters=mfcc_params['norm_filters'], 
                                      plot_filterbank=False, 
                                      power=mfcc_params['power'])
                
        # Cálculo del LFCC
        lfcc_features = \
            get_cepstral_coefficients(resp_info[0], samplerate, 
                                      spectrogram_params=lfcc_params['spec_params'],
                                      freq_lim=lfcc_params['freq_lim'], 
                                      n_filters=lfcc_params['n_filters'], 
                                      n_coefs=lfcc_params['n_mfcc'], 
                                      scale_type='linear', 
                                      filter_type='triangular', inverse_func='dct', 
                                      norm_filters=lfcc_params['norm_filters'], 
                                      plot_filterbank=False, 
                                      power=lfcc_params['power'])
        
        # Cálculo de la energía por bandas
        energy_S = \
            get_energy_bands(resp_info[0], samplerate,
                             spectrogram_params=energy_params['spec_params'],
                             fmin=energy_params['fmin'], 
                             fmax=energy_params['fmax'], 
                             fband=energy_params['fband'])
        
        # Colapsando la información
        to_append_mean = np.concatenate((mfcc_features.mean(axis=1),
                                         lfcc_features.mean(axis=1),
                                         energy_S.mean(axis=1)), axis=0)
        to_append_max = np.concatenate((mfcc_features.max(axis=1),
                                        lfcc_features.max(axis=1),
                                        energy_S.max(axis=1)), axis=0)
        
        # Agregando la información a cada arreglo
        X_data_mean.append(to_append_mean)
        X_data_max.append(to_append_max)
        
        Y_wheeze.append(resp_info[1])
        Y_crackl.append(resp_info[2])


# Transformando listas a arrays
X_data_mean = np.array(X_data_mean)
X_data_max = np.array(X_data_max)

Y_wheeze = np.array(Y_wheeze)
Y_crackl = np.array(Y_crackl)

Iteración 1: 101_1b1_Al_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 2: 101_1b1_Pr_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 3: 102_1b1_Ar_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 4: 103_2b2_Ar_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 5: 106_2b1_Pl_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 6: 106_2b1_Pr_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 7: 107_2b3_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 8: 107_2b3_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 9: 107_2b3_Ll_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 10: 107_2b3_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración

Iteración 83: 130_2b4_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 84: 130_2b4_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 85: 130_2p3_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 86: 130_2p5_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 87: 130_2p5_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 88: 130_2p5_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 89: 130_2p5_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 90: 130_2p5_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 91: 130_3b3_Ll_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 92: 130_3b4_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)


Iteración 164: 138_2p2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 165: 138_2p2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 166: 140_2b2_Ll_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 167: 140_2b3_Ll_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 168: 141_1b1_Pr_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 169: 141_1b2_Ar_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 170: 141_1b2_Lr_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 171: 141_1b2_Pr_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 172: 141_1b3_Al_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 173: 141_1b3_Ar_mc_LittC2SE
--------------------------
Samplerate = 4000, largo =

Iteración 245: 154_1b3_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 246: 154_2b4_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 247: 154_2b4_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 248: 154_2b4_Ll_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 249: 154_2b4_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 250: 154_2b4_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 251: 154_2b4_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 252: 154_3b3_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 253: 154_3b3_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 254: 154_3b3_Ll_mc_AKGC417L
--------------------------
Samplerate = 4000, largo =

Iteración 326: 162_1b2_Ll_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 327: 162_1b2_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 328: 162_1b2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 329: 162_1b2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 330: 162_2b2_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 331: 162_2b2_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 332: 162_2b2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 333: 162_2b2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 334: 162_2b3_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 335: 162_2b3_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo =

Iteración 407: 172_2b5_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 408: 172_2b5_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 409: 172_2b5_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 410: 173_1b1_Al_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 411: 174_1p2_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 412: 174_1p2_Ll_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 413: 174_1p2_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 414: 174_1p2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 415: 174_1p2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 416: 174_1p3_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo =

Iteración 488: 181_1b1_Ar_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 489: 181_1b2_Ar_mc_LittC2SE
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 490: 183_1b1_Pl_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 491: 184_1b1_Ar_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 492: 186_2b2_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 493: 186_2b2_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 494: 186_2b2_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 495: 186_2b2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 496: 186_2b2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 497: 186_2b3_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo =

Iteración 569: 200_2p4_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 570: 200_3p4_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 571: 200_3p4_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 572: 200_3p4_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 573: 200_3p4_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 574: 201_1b1_Al_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 575: 201_1b1_Ar_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 576: 201_1b2_Al_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 577: 201_1b2_Ar_sc_Meditron
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 578: 201_1b3_Al_sc_Meditron
--------------------------
Samplerate = 4000, largo =

Iteración 650: 211_1p2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 651: 211_1p2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 652: 211_1p3_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 653: 211_1p5_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 654: 213_1p2_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 655: 213_1p2_Ar_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 656: 213_1p2_Lr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 657: 213_1p2_Pl_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 658: 213_1p2_Pr_mc_AKGC417L
--------------------------
Samplerate = 4000, largo = (80182,)
Iteración 659: 213_1p3_Al_mc_AKGC417L
--------------------------
Samplerate = 4000, largo =

In [22]:
min_list = list()
max_list = list()
len_list = list()

for i in X_segments:
    min_list.append(min(i))
    max_list.append(max(i))
    len_list.append(len(i))

In [23]:
print(min(min_list))
print(max(max_list))
print(max(len_list))

-1.0413449010074045
1.0
31320


# Stratify

In [24]:
# Definición de los grupos de pacientes
patient_groups = {1: ['101', '102', '103', '106', '107', '108', '114', '115', '116', 
                      '119', '122', '123', '126', '127', '129', '162'],
                  2: ['130', '131', '132', '165', '197'],
                  3: ['133', '134', '135', '136', '137', '138', '140', '141', '142', 
                      '143', '169'],
                  4: ['144', '145', '146', '147', '148', '149', '150', '151', '152',
                      '153', '155', '159', '179'],
                  5: ['154', '156', '160', '161', '167', '225', '226'],
                  6: ['158', '163', '164', '168', '170', '171', '183', '216', '221',
                      '224'],
                  7: ['172', '173', '174', '176', '177', '180', '189', '191', '196'],
                  8: ['178', '181', '184', '186', '187', '188', '192', '194', '202', 
                      '204'],
                  9: ['193', '198', '199', '200', '201', '203'],
                  10: ['205', '206', '207', '208', '210', '211', '213', '214', '215',
                       '219', '220']}

# Definición de variables de control
segment_count = 0
patients_reviewed = list()

for i in patient_groups.keys():
    segments_by_group = 0
    
    for j in patient_groups[i]:
        # Sumando al contador de grupo
        segments_by_group += len(patient_register[j])
        
    # Agregando a la lista de pacientes totales
    patients_reviewed.extend(patient_groups[i])
    
    # Sumando al contador total 
    segment_count += segments_by_group
    
    # Print de sanidad
    print(f'Grupo {i}: {segments_by_group}')

print(f'Definidos {segment_count} de {Y_wheeze.shape[0]}')

Grupo 1: 485
Grupo 2: 492
Grupo 3: 491
Grupo 4: 487
Grupo 5: 494
Grupo 6: 493
Grupo 7: 490
Grupo 8: 487
Grupo 9: 491
Grupo 10: 492
Definidos 4902 de 4902
new


# Diseño del sistema de clasificación

In [89]:
def ML_classification_system(X_train, Y_train, X_test, Y_test, 
                             clean_params=None, sel_params=None, 
                             class_params=None):
    '''Diseño del sistema de clasificación basado en Machine 
    Learning.
    
    Parameters
    ----------
    X_train : ndarray
        Datos de entrenamiento.
    Y_train : ndarray
        Etiquetas de los datos de entrenamiento.
    X_test : ndarray
        Datos de testeo.
    Y_test : ndarray
        Etiquetas de los datos de testeo.
    clean_params: dict or None, optional
        Parámetros del proceso de limpieza de características. 
        Si es None se utilizan características por defecto: 
        'tol': 13-5, 'show': True. Por defecto es None.
    sel_params: dict or None, optional
        Parámetros del proceso de selección de características. 
        Si es None se utilizan características por defecto: 
        'n_features': 10, 'show': True. Por defecto es None.
    class_params: dict or None, optional
        Parámetros del proceso de clasificación. Si es None se 
        utilizan características por defecto: 
        'classifier': 'knn', 'k_neigh': 10. Por defecto es None. 
        En caso de usar 'svm', es posible modificar el 'kernel'.
        
    Returns
    -------
    classifier : class
        Clasificador entrenado.
    X_test : ndarray
        Matriz de testeo modificada (en caso de que X_test no 
        sea None).
    params_out : dict
        Parámetros obtenidos a partir del entrenamiento del
        sistema sobre los datos. Se entrega información de las
        características del clean ('s_clean'), normalización
        ('a_norm' y 'b_norm'), y de la selección de 
        características ('s_sfs').
    Y_pred : ndarray or None
        Predicción realizada por el sistema (en caso de que
        Y_test no sea None). Si no se entrega Y_test, la salida
        será None.
    '''
    # Parámetros por defecto
    if clean_params is None:
        clean_params = {'tol': 1e-5, 'show': True}
    
    if sel_params is None:
        sel_params = {'n_features': 10, 'show': True}
    
    if class_params is None:
        class_params = {'classifier': 'knn', 'k_neigh': 10}
        
    Y_pred = None
    
    
    #### Pipeline de la etapa de clasificación ####
    
    ## 1) Limpieza de las características
    s_clean = pybalu_clean(X_train, tol=clean_params['tol'], 
                           show=clean_params['show'])
    
    # Aplicando la limpieza
    X_train = X_train[:, s_clean]
    
    
    ## 2) Normalización de los datos
    X_train, a_norm, b_norm = normalize(X_train)
    
    
    ## 3) Selección de características
    s_sfs = sfs(X_train, Y_train, show=sel_params['show'],
                n_features=sel_params['n_features'])
    
    # Aplicando la selección
    X_train = X_train[:, s_sfs]
    
    
    ## 4) Proceso de clasificación   
    if class_params['classifier'] == 'knn':
        classifier = KNeighborsClassifier(n_neighbors=\
                                          class_params['k_neigh'])
        
    elif class_params['classifier'] == 'svm':
        classifier = svm.SVC(kernel=class_params['kernel'])
    
    else:
        raise Exception('Opción de clasificador no definida '
                        'correctamente.')
    
    # Ajustando el clasificador
    classifier.fit(X_train, Y_train)
    
    
    # Aplicando todo el proceso a los datos de testeo
    if X_test is not None:
        X_test = X_test[:, s_clean]         # 1) Clean
        X_test = a_norm * X_test + b_norm   # 2) Normalización
        X_test = X_test[:, s_sfs]           # 3) Selección
        
        # Aplicando el clasificador
        if Y_test is not None:
            Y_pred = classifier.predict(X_test)

    
    # Definición del diccionario de parámetros
    params_out = {'a_norm': a_norm, 'b_norm': b_norm, 's_clean': s_clean,
                  's_sfs': s_sfs}
        
    return classifier, X_test, params_out, Y_pred


def NN_MLP_classification_system(X_train, Y_train, X_test, Y_test, 
                                 clean_params=None, sel_params=None, 
                                 mlp_params=None):
    '''Diseño del sistema de clasificación basado en Redes Neuronales
    Multicapas.
    
    Parameters
    ----------
    X_train : ndarray
        Datos de entrenamiento.
    Y_train : ndarray
        Etiquetas de los datos de entrenamiento.
    X_test : ndarray
        Datos de testeo.
    Y_test : ndarray
        Etiquetas de los datos de testeo.
    clean_params: dict or None, optional
        Parámetros del proceso de limpieza de características. 
        Si es None se utilizan características por defecto: 
        {'tol': 13-5, 'show': True}. Por defecto es None.
    sel_params: dict or None, optional
        Parámetros del proceso de selección de características. 
        Si es None se utilizan características por defecto: 
        {'n_features': 10, 'show': True}. Por defecto es None.
    mlp_params : dict or None, optional
        Parámetros del preoceso de clasificación con MLP. Si es
        None se utilizan las características por defecto:
        {'optimizer': 'Adam', 'loss': 'binary_crossentropy',
         'batch_size': None, 'epochs': 100, 'verbose': 1, 
         'metrics': ['accuracy', tf.keras.metrics.Recall(), 
                      tf.keras.metrics.Precision()],
         'out_layer': 'sigmoid', 'preprocessing': True}
        
    Returns
    -------
    classifier : class
        Clasificador entrenado.
    X_test : ndarray
        Matriz de testeo modificada (en caso de que X_test no 
        sea None).
    params_out : dict
        Parámetros obtenidos a partir del entrenamiento del
        sistema sobre los datos. Se entrega información de las
        características del clean ('s_clean'), normalización
        ('a_norm' y 'b_norm'), y de la selección de 
        características ('s_sfs').
    Y_pred : ndarray or None
        Predicción realizada por el sistema (en caso de que
        Y_test no sea None). Si no se entrega Y_test, la salida
        será None.
    '''
    # Parámetros por defecto
    if clean_params is None:
        clean_params = {'tol': 1e-5, 'show': True}
    
    if sel_params is None:
        sel_params = {'n_features': 10, 'show': True}
        
    if mlp_params is None:
        mlp_params = {'optimizer': 'Adam', 'loss': 'binary_crossentropy',
                      'batch_size': None, 'epochs': 100, 'verbose': 1, 
                      'metrics': ['accuracy', tf.keras.metrics.Recall(), 
                                  tf.keras.metrics.Precision()],
                      'out_layer': 'sigmoid', 'preprocessing': True}
    
    Y_pred = None
    
    
    #### Pipeline de la etapa de clasificación ####
    
    # Rutina de preprocesamiento
    if mlp_params['preprocessing']:
        ## 1) Limpieza de las características
        s_clean = pybalu_clean(X_train, tol=clean_params['tol'], 
                               show=clean_params['show'])

        # Aplicando la limpieza
        X_train = X_train[:, s_clean]


        ## 2) Normalización de los datos
        X_train, a_norm, b_norm = normalize(X_train)


        ## 3) Selección de características
        s_sfs = sfs(X_train, Y_train, show=sel_params['show'],
                    n_features=sel_params['n_features'])

        # Aplicando la selección
        X_train = X_train[:, s_sfs]

    
    
    ## 4) Proceso de clasificación
    
    # Definición del modelo
    model = MLP_network(input_shape=(X_train.shape[1],),
                        out_layer=mlp_params['out_layer'])
    
    # Compilando modelos
    model.compile(optimizer=mlp_params['optimizer'], 
                  loss=mlp_params['loss'],
                  metrics=mlp_params['metrics'])
    
    
    # Definición de los vectores
    if mlp_params['out_layer'] == 'softmax':
        # One-Hot
        Y_train_to = \
            np.array([Y_train, np.ones(len(Y_train)) - Y_train]).T
    
    elif mlp_params['out_layer'] == 'sigmoid':
        # Normal
        Y_train_to = Y_train
    
    
    # Ajustando el Modelo
    history = model.fit(x=X_train, y=Y_train_to, 
                        batch_size=mlp_params['batch_size'],
                        epochs=mlp_params['epochs'],
                        verbose=mlp_params['verbose'])
    
    
    # Aplicando todo el proceso a los datos de testeo
    if X_test is not None:
        # Si se realizó el preprocesamiento, se actualiza
        if preprocessing:
            X_test = X_test[:, s_clean]         # 1) Clean
            X_test = X_test * a_norm + b_norm   # 2) Normalización
            X_test = X_test[:, s_sfs]           # 3) Selección
        
        # Aplicando el clasificador
        if Y_test is not None:
            Y_pred = model.predict(X_test)

    
    # Definición del diccionario de parámetros
    params_out = {'a_norm': a_norm, 'b_norm': b_norm, 's_clean': s_clean,
                  's_sfs': s_sfs, 'history': history}
        
    return model, X_test, params_out, Y_pred


def MLP_network(input_shape, out_layer='sigmoid'):
    '''Función que define una red de perceptrones multicapas para 
    clasificar.
    
    Parameters
    ----------
    input_shape : list or ndarray
        Dimensión de la información de entrada.
    out_layer : {'sigmoid', 'softmax'}, optional
        Función a usar en la capa de salida de la red. Por defecto
        es 'sigmoid'.
    
    Returns
    -------
    model: tensorflow.keras.Model
        Modelo del sistema.
    '''
    
    def _layer(input_layer, units, kernel_initializer, 
               bias_initializer, name):
        '''Función auxiliar que modela las capas Dense + batchnorm +
        Activation ReLU'''
        # Aplicando la concatenación de capas
        x_dense = tf.keras.layers.Dense(units=units, 
                                        bias_initializer=bias_initializer,
                                        kernel_initializer=kernel_initializer,
                                        name=f'Dense_{name}')(input_layer)
        x_dense = \
            tf.keras.layers.BatchNormalization(name=f'BatchNorm_{name}')(x_dense)
        x_dense = \
            tf.keras.layers.Activation('relu', name=f'Activation_{name}')(x_dense)

        return x_dense
    
    
    # Definición de la entrada
    x_in = tf.keras.Input(shape=input_shape, dtype='float32')
    
    
    # Definición de la red misma
    x_layer = _layer(x_in, units=500, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_1')
    x_layer = _layer(x_layer, units=200, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_2')
    x_layer = _layer(x_layer, units=100, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_3')
    x_layer = _layer(x_layer, units=80, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_4')
    x_layer = _layer(x_layer, units=30, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_5')
    x_layer = _layer(x_layer, units=10, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_6')
    x_layer = _layer(x_layer, units=5, kernel_initializer='he_normal', 
                     bias_initializer='he_normal', name='Layer_7')
    
    # Definición de la salida
    if out_layer == 'softmax':
        x_out = tf.keras.layers.Dense(2, activation='softmax', 
                                      kernel_initializer='he_normal', 
                                      bias_initializer='he_normal',
                                      name='softmax_out')(x_layer)
    elif out_layer == 'sigmoid':
        x_out = tf.keras.layers.Dense(1, activation='sigmoid', 
                                      kernel_initializer='he_normal', 
                                      bias_initializer='he_normal',
                                      name='sigmoid_out')(x_layer)
    else:
        raise Exception(f'Opción de parámetro "out_layer"={out_layer} '
                        f'no válido.')
    
    # Definir el modelo
    model = tf.keras.Model(inputs=x_in, outputs=x_out, name='Red_MLP')
    
    return model


def CNN_network(input_shape, padding_value, out_layer='sigmoid'):
    '''Función que define una red CNN para extraer características y 
    clasificar.
    
    Parameters
    ----------
    padding_value : float
        Valor utilizado para hacer padding en la señal.
    out_layer : {'sigmoid', 'softmax'}, optional
        Función a usar en la capa de salida de la red. Por defecto
        es 'sigmoid'.
    
    Returns
    -------
    model: tensorflow.keras.Model
        Modelo del sistema.
    '''
    def _conv_bn_act_layer(input_layer, filters, kernel_size, padding,
                           kernel_initializer, bias_initializer, name):
        '''Función auxiliar que modela las capas azules conv + batchnorm +
        Activation ReLU para realizar el ENCODING.'''
        # Aplicando la concatenación de capas
        x_conv = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, 
                                        kernel_initializer=kernel_initializer,
                                        bias_initializer=bias_initializer,
                                        padding=padding, 
                                        name=f'Conv_{name}')(input_layer)
        x_conv = \
            tf.keras.layers.BatchNormalization(name=f'BatchNorm_{name}')(x_conv)
        x_conv = \
            tf.keras.layers.Activation('relu', name=f'Activation_{name}')(x_conv)

        return x_conv
    
    
    def _cnn_layers(input_layer, n_layers_conv, layer_params):
        '''Función auxiliar que permite modelar "n_layers_conv" capas CNN seguida de 
        una capa de Maxpooling.  
        '''
        # Definición de la salida de este bloque
        x_enc = input_layer
        
        # Aplicando "n_layers_conv" capas convolucionales de codificación
        for i in range(n_layers_conv):
            x_enc = _conv_bn_act_layer(x_enc, filters=layer_params['filters'], 
                                       kernel_size=layer_params['kernel_size'], 
                                       padding=layer_params['padding'],
                                       kernel_initializer=layer_params['kernel_initializer'],
                                       bias_initializer=layer_params['bias_initializer'],
                                       name=f"{layer_params['name']}_{i}")

        # Finalmente la capa de MaxPooling
        x_enc = tf.keras.layers.MaxPooling1D(pool_size=2, strides=2, 
                                             padding='valid',
                                             name=f"MaxPool_Conv_{layer_params['name']}")(x_enc)
        return x_enc
    
    
    def _mlp_layers(input_layer, units, kernel_initializer, 
               bias_initializer, name):
        '''Función auxiliar que modela las capas Dense + batchnorm +
        Activation ReLU'''
        # Aplicando la concatenación de capas
        x_dense = tf.keras.layers.Dense(units=units, 
                                        bias_initializer=bias_initializer,
                                        kernel_initializer=kernel_initializer,
                                        name=f'Dense_{name}')(input_layer)
        x_dense = \
            tf.keras.layers.BatchNormalization(name=f'BatchNorm_{name}')(x_dense)
        x_dense = \
            tf.keras.layers.Activation('relu', name=f'Activation_{name}')(x_dense)

        return x_dense
    
    
    # Definición de la entrada
    x_in = tf.keras.Input(shape=input_shape, dtype='float32')
    
    # Definición de la capa de máscara
    x_masked = tf.keras.layers.Masking(mask_value=padding_value)(x_in)
                                             
    # Definición de la CNN
    layer_params_1 = {'filters': 50, 'kernel_size': 100, 'padding': 'same',
                      'kernel_initializer': 'he_normal',
                      'bias_initializer': 'he_normal', 'name': 'cnn_1'}
    x_layer = _cnn_layers(x_masked, n_layers_conv=2, layer_params=layer_params_1)
                                             
    layer_params_2 = {'filters': 30, 'kernel_size': 50, 'padding': 'same',
                      'kernel_initializer': 'he_normal',
                      'bias_initializer': 'he_normal', 'name': 'cnn_2'}
    x_layer = _cnn_layers(x_layer, n_layers_conv=2, layer_params=layer_params_2)
                                             
    layer_params_3 = {'filters': 10, 'kernel_size': 25, 'padding': 'same',
                      'kernel_initializer': 'he_normal',
                      'bias_initializer': 'he_normal', 'name': 'cnn_3'}
    x_layer = _cnn_layers(x_layer, n_layers_conv=3, layer_params=layer_params_3)
                                             
    layer_params_4 = {'filters': 7, 'kernel_size': 13, 'padding': 'same',
                      'kernel_initializer': 'he_normal',
                      'bias_initializer': 'he_normal', 'name': 'cnn_4'}
    x_layer = _cnn_layers(x_layer, n_layers_conv=3, layer_params=layer_params_4)
                      
    
    # Definición de la capa de aplanamiento para conectar la CNN con la FCL 
    x_layer = tf.keras.layers.Flatten()(x_layer)                                     
    
    
    # Definición de la red misma
    x_layer = _mlp_layers(x_layer, units=500, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_1')
    x_layer = _mlp_layers(x_layer, units=200, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_2')
    x_layer = _mlp_layers(x_layer, units=100, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_3')
    x_layer = _mlp_layers(x_layer, units=80, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_4')
    x_layer = _mlp_layers(x_layer, units=30, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_5')
    x_layer = _mlp_layers(x_layer, units=10, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_6')
    x_layer = _mlp_layers(x_layer, units=5, kernel_initializer='he_normal', 
                          bias_initializer='he_normal', name='Layer_7')
    
    # Definición de la salida
    if out_layer == 'softmax':
        x_out = tf.keras.layers.Dense(2, activation='softmax', 
                                      kernel_initializer='he_normal', 
                                      bias_initializer='he_normal',
                                      name='softmax_out')(x_layer)
    elif out_layer == 'sigmoid':
        x_out = tf.keras.layers.Dense(1, activation='sigmoid', 
                                      kernel_initializer='he_normal', 
                                      bias_initializer='he_normal',
                                      name='sigmoid_out')(x_layer)
    else:
        raise Exception(f'Opción de parámetro "out_layer"={out_layer} '
                        f'no válido.')
    
    # Definir el modelo
    model = tf.keras.Model(inputs=x_in, outputs=x_out, name='Red_CNN')
    
    return model


def train_test_definition(X_data, Y_data, index_test, patient_groups,
                          patient_register, kfold=10):
    '''Función que permite retornar los conjuntos de entrenamiento
    y testeo en base a la división de la base de datos realizada 
    previamente para hacer una validación cruzada.
    
    Parameters
    ----------
    X_data : ndarray
        Matriz de características.
    Y_data : ndarray
        Etiquetas de la matriz de características.
    index_test : int
        Índice del grupo de testeo en la validación cruzada.
    patient_gropus : dict
        Diccionario que contiene los pacientes que corresponden
        a cada grupo de la validación cruzada.
    patient_register : dict
        Diccionario que contiene las entradas de cada paciente
        en la matriz de características.
    kfold : int, optional
        k de la validación cruzada que se realiza. Por defecto 
        es 10.
    
    Returns
    -------
    X_train : ndarray
        Datos de entrenamiento.
    Y_train : ndarray
        Etiquetas de los datos de entrenamiento.
    X_test : ndarray
        Datos de testeo.
    Y_test : ndarray
        Etiquetas de los datos de testeo.
    '''
    # Definición de los pacientes de testeo 
    test_patients = patient_groups[index_test]
    
    # Y entrenamiento
    train_patients = list()
    for i in range(1, kfold + 1):
        if i != index_test:
            train_patients.extend(patient_groups[i])
            
    # Definición de las entradas de entrenamiento y testeo
    train_indexes = list()
    test_indexes = list()
    
    for i in train_patients:
        train_indexes.extend(patient_register[str(i)])
    
    for i in test_patients:
        test_indexes.extend(patient_register[str(i)])

    # Aplicando los indices sobre los datos
    X_train = X_data[train_indexes]
    Y_train = Y_data[train_indexes]
    X_test  = X_data[test_indexes]
    Y_test  = Y_data[test_indexes]
    
    return X_train, Y_train, X_test, Y_test
    
    
def crossval_results(X_data, Y_data, experiment_type='ML', clean_params=None, 
                     sel_params=None, class_params=None, mlp_params=None,
                     kfold=10):
    '''Función que permite calcular el desempeño del clasificador
    mediante una validación cruzada de los datos.
    
    Parameters
    ----------
    X_data : ndarray
        Matriz de características.
    Y_data : ndarray
        Etiquetas de la matriz de características.
    experiment_type : {'ML', 'NN-MLP' 'CNN'}, optional
        Tipo de sistema a estudiar. 'ML' corresponde a un diseño
        estilo Machine-Learning (Rec. de Patrones). 'NN-MLP'
        corresponde a un diseño que utiliza como salida un 
        clasificador de perceptrones multicapas. 'CNN' es un
        diseño que utiliza una CNN con arquitectura clásica
        (AlexNet o VGG-16) para clasificar cada segmento.
        Por defecto es 'ML'.
    clean_params: dict or None, optional
        Parámetros del proceso de limpieza de características. 
        Si es None se utilizan características por defecto: 
        'tol': 13-5, 'show': True. Por defecto es None.
    sel_params: dict or None, optional
        Parámetros del proceso de selección de características. 
        Si es None se utilizan características por defecto: 
        'n_features': 10, 'show': True. Por defecto es None.
    class_params: dict or None, optional
        Parámetros del proceso de clasificación. Si es None se 
        utilizan características por defecto: 
        'classifier': 'knn', 'k_neigh': 10. Por defecto es None. 
        En caso de usar 'svm', es posible modificar el 'kernel'.
    mlp_params : dict or None, optional
        Parámetros del preoceso de clasificación con MLP. Si es
        None se utilizan las características por defecto:
        {'optimizer': 'Adam', 'loss': 'binary_crossentropy',
         'batch_size': None, 'epochs': 100, 'verbose': 1, 
         'metrics': ['accuracy', tf.keras.metrics.Recall(), 
                      tf.keras.metrics.Precision()],
         'out_layer': 'sigmoid', 'preprocessing': True}
    kfold : int, optional
        k de las repeticiones de la validación cruzada k-fold.
        Por defecto es 10.
    
    Returns
    -------
    confmat_list: list
        Lista de las matrices de confusión para cada iteración.
    accuracy_list : list
        Lista de las accuracys para cada iteración.
    '''
    # Definición de la lista de matrices de confusión
    confmat_list = list()
    
    # Iteraciónes del k-fold cross validation
    for index in range(1, kfold + 1):
        # Definición de la base de datos
        X_train, Y_train, X_test, Y_test = \
            train_test_definition(X_data, Y_data, index_test=index, 
                                  patient_groups=patient_groups,
                                  patient_register=patient_register)

        # Aplicando el clasificador
        if experiment_type == 'ML':
            classifier, X_test, params_out, Y_pred = \
                    ML_classification_system(X_train, Y_train, X_test, Y_test, 
                                             clean_params=clean_params, 
                                             sel_params=sel_params, 
                                             class_params=class_params)
            
        elif experiment_type == 'NN-MLP':
            model, X_test, params_out, Y_pred = \
                NN_MLP_classification_system(X_train, Y_train, X_test, Y_test, 
                                             clean_params=clean_params, 
                                             sel_params=sel_params, 
                                             mlp_params=mlp_params)
            
            # Modificar el Y_pred
            Y_pred = np.where(Y_pred < 0.5, 0, 1)[:, 0]

        elif experiment_type == 'CNN':
            pass
    
        else:
            raise Exception('Opción no válida para "experiment_type".')
    
        # Obteniendo la matriz de confusión
        conf_mat = confusion_matrix(Y_pred, Y_test)
        
        # Agregando a la lista
        confmat_list.append(conf_mat)
    
    # Cálculo de los resultados finales
    accuracy_list = list()
    
    for cmat in confmat_list:
        accuracy_i = np.sum(np.diag(cmat)) / np.sum(cmat)
        accuracy_list.append(accuracy_i)
        
    print(f'Accuracy {kfold}-fold CV: {np.mean(accuracy_list)} +- '
          f'{np.std(accuracy_list)}')
    
    return confmat_list, accuracy_list

In [41]:
# Parámetros generales
clean_params = {'tol': 1e-5, 'show': True}
sel_params = {'n_features': 60, 'show': True}

# kNN

In [72]:
class_params = {'classifier': 'knn', 'k_neigh': 3}

# Diseño de los clasificadores y resultados
confmat_wheeze_mean, accuracy_wheeze_mean = \
        crossval_results(X_data_mean, Y_wheeze, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

confmat_crackl_mean, accuracy_crackl_mean = \
        crossval_results(X_data_mean, Y_crackl, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

confmat_wheeze_max, accuracy_wheeze_max = \
        crossval_results(X_data_max, Y_wheeze, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

confmat_crackl_max, accuracy_crackl_max = \
        crossval_results(X_data_max, Y_crackl, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

Clean: number of features reduced from 149 to 121.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:14<00:00, 4.10 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.88 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.97 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.97 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.93 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.90 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.88 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.97 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.75 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.96 features/s]


Accuracy 10-fold CV: 0.5510293314947965 +- 0.07420473223452298
Clean: number of features reduced from 149 to 121.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.98 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.66 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.72 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.69 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.70 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.71 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.70 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.76 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.73 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.79 features/s]


Accuracy 10-fold CV: 0.7616261834350369 +- 0.06144312405586361
Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.08 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.06 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.14 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.09 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.95 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.14 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.05 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.08 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.08 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.15 features/s]


Accuracy 10-fold CV: 0.5407881704969874 +- 0.06759986120558278
Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.94 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.92 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.98 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.94 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.92 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.95 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.94 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:24<00:00, 2.48 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.76 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:23<00:00, 2.54 features/s]


Accuracy 10-fold CV: 0.7961227283418719 +- 0.06840567558223988


# SVM

In [91]:
class_params = {'classifier': 'svm', 'kernel': 'poly'}

# Diseño de los clasificadores y resultados
confmat_wheeze_mean, accuracy_wheeze_mean = \
        crossval_results(X_data_mean, Y_wheeze, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

confmat_crackl_mean, accuracy_crackl_mean = \
        crossval_results(X_data_mean, Y_crackl, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

confmat_wheeze_max, accuracy_wheeze_max = \
        crossval_results(X_data_max, Y_wheeze, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

confmat_crackl_max, accuracy_crackl_max = \
        crossval_results(X_data_max, Y_crackl, 
                         experiment_type='ML', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         class_params=class_params)

Clean: number of features reduced from 149 to 121.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:14<00:00, 4.27 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.92 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.72 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.65 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.77 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.69 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.80 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.93 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.68 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.87 features/s]


Accuracy 10-fold CV: 0.5660187439489475 +- 0.1290230327118345
Clean: number of features reduced from 149 to 121.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.70 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.43 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.42 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.47 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.43 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.17 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.28 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.32 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.24 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.39 features/s]


Accuracy 10-fold CV: 0.7971965833752551 +- 0.07703200750850601
Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.73 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.74 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.86 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.93 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.65 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.72 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.89 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.92 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.82 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.86 features/s]


Accuracy 10-fold CV: 0.5643193513719288 +- 0.13877708534888913
Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.85 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.68 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.84 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:24<00:00, 2.43 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:23<00:00, 2.61 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:27<00:00, 2.19 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:24<00:00, 2.49 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:23<00:00, 2.57 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.62 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.74 features/s]


Accuracy 10-fold CV: 0.8138678363810097 +- 0.07293877920007519


# Redes Neuronales MLP

In [97]:
preprocessing = True
mlp_params = {'optimizer': 'Adam', 'loss': 'binary_crossentropy',
              'batch_size': None, 'epochs': 30, 'verbose': 0, 
              'metrics': ['accuracy', tf.keras.metrics.Recall(), 
                          tf.keras.metrics.Precision()],
              'out_layer': 'sigmoid', 'preprocessing': True}

# Diseño de los clasificadores y resultados
confmat_wheeze_mean, accuracy_wheeze_mean = \
        crossval_results(X_data_mean, Y_wheeze, 
                         experiment_type='NN-MLP', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         mlp_params=mlp_params)

confmat_crackl_mean, accuracy_crackl_mean = \
        crossval_results(X_data_mean, Y_crackl, 
                         experiment_type='NN-MLP', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         mlp_params=mlp_params)

confmat_wheeze_max, accuracy_wheeze_max = \
        crossval_results(X_data_max, Y_wheeze, 
                         experiment_type='NN-MLP', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         mlp_params=mlp_params)

confmat_crackl_max, accuracy_crackl_max = \
        crossval_results(X_data_max, Y_crackl, 
                         experiment_type='NN-MLP', 
                         clean_params=clean_params, 
                         sel_params=sel_params, 
                         mlp_params=mlp_params)

Clean: number of features reduced from 149 to 121.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 4.00 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.72 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.53 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.07 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.37 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:15<00:00, 3.77 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.69 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.71 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.65 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.51 features/s]


Accuracy 10-fold CV: 0.5547237580848834 +- 0.08159031973013899
Clean: number of features reduced from 149 to 121.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:16<00:00, 3.66 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.36 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.30 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:19<00:00, 3.10 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.43 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.43 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.27 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:18<00:00, 3.23 features/s]


Clean: number of features reduced from 149 to 126.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.41 features/s]


Clean: number of features reduced from 149 to 124.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:17<00:00, 3.47 features/s]


Accuracy 10-fold CV: 0.7507860071542727 +- 0.07070154593320024
Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.81 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.86 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.63 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.84 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:20<00:00, 2.86 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.82 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.79 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.76 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.76 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:21<00:00, 2.81 features/s]


Accuracy 10-fold CV: 0.5655197838280108 +- 0.07584921945477313
Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.65 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.67 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.68 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:23<00:00, 2.57 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:23<00:00, 2.61 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.64 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.65 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.67 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:22<00:00, 2.69 features/s]


Clean: number of features reduced from 149 to 149.


Selecting Features: 100%|████████████████████████████████████████████████████| 60.0/60.0 [00:23<00:00, 2.52 features/s]


Accuracy 10-fold CV: 0.7933760009183782 +- 0.055980745332417696


# Redes neuronales CNN

In [63]:
X_segments_mat = tf.keras.preprocessing.sequence.pad_sequences(X_segments, padding="post", value=10)
X_segments_mat = np.expand_dims(X_segments_mat, axis=-1)
print(X_segments_mat.shape)

(4902, 31320, 1)


In [141]:
cnn_params = {'optimizer': 'Adam', 'loss': 'binary_crossentropy',
              'batch_size': 5, 'epochs': 10, 'verbose': 0, 
              'metrics': ['accuracy', tf.keras.metrics.Recall(), 
                          tf.keras.metrics.Precision()],
              'out_layer': 'softmax'}

X_train, Y_train, X_test, Y_test = \
        train_test_definition(X_segments_mat, Y_crackl, index_test=10, 
                              patient_groups=patient_groups,
                              patient_register=patient_register, kfold=10)

In [142]:
model = CNN_network(input_shape=X_segments_mat.shape[1:], padding_value=10, 
                    out_layer=cnn_params['out_layer'])
model.compile(optimizer=cnn_params['optimizer'], loss=cnn_params['loss'],
              metrics=cnn_params['metrics'])

In [143]:
model.summary()

Model: "Red_CNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_19 (InputLayer)        [(None, 31320, 1)]        0         
_________________________________________________________________
masking_18 (Masking)         (None, 31320, 1)          0         
_________________________________________________________________
Conv_cnn_1_0 (Conv1D)        (None, 31320, 50)         5050      
_________________________________________________________________
BatchNorm_cnn_1_0 (BatchNorm (None, 31320, 50)         200       
_________________________________________________________________
Activation_cnn_1_0 (Activati (None, 31320, 50)         0         
_________________________________________________________________
Conv_cnn_1_1 (Conv1D)        (None, 31320, 50)         250050    
_________________________________________________________________
BatchNorm_cnn_1_1 (BatchNorm (None, 31320, 50)         200 

In [144]:
X_train.shape

(4410, 31320, 1)

In [145]:
# Pasando a One-Hot
Y_train1H = np.array([Y_train, np.ones(Y_train.shape[0]) - Y_train]).T
print(Y_train.shape)
print(Y_train1H.shape)

(4410,)
(4410, 2)


In [146]:
history = model.fit(x=X_train, y=Y_train1H, epochs=cnn_params['epochs'],
                    verbose=1, batch_size=cnn_params['batch_size'])

Epoch 1/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.7347 - accuracy: 0.4000 - recall_15: 0.4000 - precision_15: 0.400 - ETA: 2:11 - loss: 0.7045 - accuracy: 0.5000 - recall_15: 0.5000 - precision_15: 0.500 - ETA: 2:53 - loss: 0.7543 - accuracy: 0.3333 - recall_15: 0.3333 - precision_15: 0.333 - ETA: 3:13 - loss: 0.8893 - accuracy: 0.3500 - recall_15: 0.3500 - precision_15: 0.350 - ETA: 3:24 - loss: 0.8817 - accuracy: 0.2800 - recall_15: 0.2800 - precision_15: 0.280 - ETA: 3:31 - loss: 0.8772 - accuracy: 0.3000 - recall_15: 0.3000 - precision_15: 0.300 - ETA: 3:35 - loss: 0.8731 - accuracy: 0.2571 - recall_15: 0.2571 - precision_15: 0.257 - ETA: 3:39 - loss: 0.9436 - accuracy: 0.2500 - recall_15: 0.2500 - precision_15: 0.250 - ETA: 3:42 - loss: 0.9398 - accuracy: 0.2444 - recall_15: 0.2444 - precision_15: 0.244 - ETA: 3:44 - loss: 1.0049 - accuracy: 0.2600 - recall_15: 0.2600 - precision_15: 0.260 - ETA: 3:45 - loss: 0.9995 - accuracy: 0.2909 - recall_15: 0.2909 - precision_15:

186/882 [=====>........................] - ETA: 3:41 - loss: 0.9356 - accuracy: 0.3319 - recall_15: 0.3319 - precision_15: 0.331 - ETA: 3:41 - loss: 0.9378 - accuracy: 0.3368 - recall_15: 0.3368 - precision_15: 0.336 - ETA: 3:40 - loss: 0.9399 - accuracy: 0.3417 - recall_15: 0.3417 - precision_15: 0.341 - ETA: 3:40 - loss: 0.9374 - accuracy: 0.3381 - recall_15: 0.3381 - precision_15: 0.338 - ETA: 3:40 - loss: 0.9349 - accuracy: 0.3388 - recall_15: 0.3388 - precision_15: 0.338 - ETA: 3:40 - loss: 0.9342 - accuracy: 0.3414 - recall_15: 0.3414 - precision_15: 0.341 - ETA: 3:39 - loss: 0.9318 - accuracy: 0.3460 - recall_15: 0.3460 - precision_15: 0.346 - ETA: 3:39 - loss: 0.9348 - accuracy: 0.3446 - recall_15: 0.3446 - precision_15: 0.344 - ETA: 3:39 - loss: 0.9301 - accuracy: 0.3490 - recall_15: 0.3490 - precision_15: 0.349 - ETA: 3:39 - loss: 0.9278 - accuracy: 0.3534 - recall_15: 0.3534 - precision_15: 0.353 - ETA: 3:38 - loss: 0.9248 - accuracy: 0.3558 - recall_15: 0.3558 - precision_1















Epoch 2/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.5080 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:07 - loss: 0.5079 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:49 - loss: 0.4059 - accuracy: 0.8667 - recall_15: 0.8667 - precision_15: 0.866 - ETA: 3:11 - loss: 0.4313 - accuracy: 0.8500 - recall_15: 0.8500 - precision_15: 0.850 - ETA: 3:24 - loss: 0.4497 - accuracy: 0.8400 - recall_15: 0.8400 - precision_15: 0.840 - ETA: 3:32 - loss: 0.4593 - accuracy: 0.8333 - recall_15: 0.8333 - precision_15: 0.833 - ETA: 3:38 - loss: 0.4976 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:42 - loss: 0.4988 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:45 - loss: 0.4997 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:48 - loss: 0.4785 - accuracy: 0.8200 - recall_15: 0.8200 - precision_15: 0.820 - ETA: 3:50 - loss: 0.4610 - accuracy: 0.8364 - recall_15: 0.8364 - precision_15:

186/882 [=====>........................] - ETA: 3:48 - loss: 0.5905 - accuracy: 0.7532 - recall_15: 0.7532 - precision_15: 0.753 - ETA: 3:47 - loss: 0.5873 - accuracy: 0.7558 - recall_15: 0.7558 - precision_15: 0.755 - ETA: 3:47 - loss: 0.5920 - accuracy: 0.7521 - recall_15: 0.7521 - precision_15: 0.752 - ETA: 3:47 - loss: 0.5895 - accuracy: 0.7546 - recall_15: 0.7546 - precision_15: 0.754 - ETA: 3:46 - loss: 0.5909 - accuracy: 0.7531 - recall_15: 0.7531 - precision_15: 0.753 - ETA: 3:46 - loss: 0.5900 - accuracy: 0.7535 - recall_15: 0.7535 - precision_15: 0.753 - ETA: 3:46 - loss: 0.5863 - accuracy: 0.7560 - recall_15: 0.7560 - precision_15: 0.756 - ETA: 3:46 - loss: 0.5856 - accuracy: 0.7564 - recall_15: 0.7564 - precision_15: 0.756 - ETA: 3:45 - loss: 0.5828 - accuracy: 0.7588 - recall_15: 0.7588 - precision_15: 0.758 - ETA: 3:45 - loss: 0.5842 - accuracy: 0.7573 - recall_15: 0.7573 - precision_15: 0.757 - ETA: 3:45 - loss: 0.5807 - accuracy: 0.7596 - recall_15: 0.7596 - precision_1















Epoch 3/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.7979 - accuracy: 0.4000 - recall_15: 0.4000 - precision_15: 0.400 - ETA: 2:11 - loss: 0.7340 - accuracy: 0.5000 - recall_15: 0.5000 - precision_15: 0.500 - ETA: 2:55 - loss: 0.6609 - accuracy: 0.6000 - recall_15: 0.6000 - precision_15: 0.600 - ETA: 3:16 - loss: 0.6216 - accuracy: 0.6500 - recall_15: 0.6500 - precision_15: 0.650 - ETA: 3:28 - loss: 0.5432 - accuracy: 0.7200 - recall_15: 0.7200 - precision_15: 0.720 - ETA: 3:37 - loss: 0.5366 - accuracy: 0.7333 - recall_15: 0.7333 - precision_15: 0.733 - ETA: 3:42 - loss: 0.5658 - accuracy: 0.7143 - recall_15: 0.7143 - precision_15: 0.714 - ETA: 3:47 - loss: 0.5581 - accuracy: 0.7250 - recall_15: 0.7250 - precision_15: 0.725 - ETA: 3:50 - loss: 0.5784 - accuracy: 0.7111 - recall_15: 0.7111 - precision_15: 0.711 - ETA: 3:52 - loss: 0.6009 - accuracy: 0.7000 - recall_15: 0.7000 - precision_15: 0.700 - ETA: 3:54 - loss: 0.6136 - accuracy: 0.6909 - recall_15: 0.6909 - precision_15:

186/882 [=====>........................] - ETA: 3:50 - loss: 0.5322 - accuracy: 0.7723 - recall_15: 0.7723 - precision_15: 0.772 - ETA: 3:50 - loss: 0.5321 - accuracy: 0.7726 - recall_15: 0.7726 - precision_15: 0.772 - ETA: 3:50 - loss: 0.5293 - accuracy: 0.7750 - recall_15: 0.7750 - precision_15: 0.775 - ETA: 3:49 - loss: 0.5291 - accuracy: 0.7753 - recall_15: 0.7753 - precision_15: 0.775 - ETA: 3:49 - loss: 0.5312 - accuracy: 0.7735 - recall_15: 0.7735 - precision_15: 0.773 - ETA: 3:49 - loss: 0.5310 - accuracy: 0.7737 - recall_15: 0.7737 - precision_15: 0.773 - ETA: 3:49 - loss: 0.5279 - accuracy: 0.7760 - recall_15: 0.7760 - precision_15: 0.776 - ETA: 3:48 - loss: 0.5277 - accuracy: 0.7762 - recall_15: 0.7762 - precision_15: 0.776 - ETA: 3:48 - loss: 0.5251 - accuracy: 0.7784 - recall_15: 0.7784 - precision_15: 0.778 - ETA: 3:48 - loss: 0.5249 - accuracy: 0.7786 - recall_15: 0.7786 - precision_15: 0.778 - ETA: 3:48 - loss: 0.5297 - accuracy: 0.7750 - recall_15: 0.7750 - precision_1















Epoch 4/10


 93/882 [==>...........................] - ETA: 1s - loss: 0.5043 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:11 - loss: 0.6212 - accuracy: 0.7000 - recall_15: 0.7000 - precision_15: 0.700 - ETA: 2:53 - loss: 0.5045 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:14 - loss: 0.5628 - accuracy: 0.7500 - recall_15: 0.7500 - precision_15: 0.750 - ETA: 3:27 - loss: 0.5506 - accuracy: 0.7600 - recall_15: 0.7600 - precision_15: 0.760 - ETA: 3:37 - loss: 0.5042 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:42 - loss: 0.5043 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:47 - loss: 0.5334 - accuracy: 0.7750 - recall_15: 0.7750 - precision_15: 0.775 - ETA: 3:50 - loss: 0.5560 - accuracy: 0.7556 - recall_15: 0.7556 - precision_15: 0.755 - ETA: 3:53 - loss: 0.5243 - accuracy: 0.7800 - recall_15: 0.7800 - precision_15: 0.780 - ETA: 3:56 - loss: 0.5436 - accuracy: 0.7636 - recall_15: 0.7636 - precision_15:

186/882 [=====>........................] - ETA: 3:52 - loss: 0.5390 - accuracy: 0.7681 - recall_15: 0.7681 - precision_15: 0.768 - ETA: 3:51 - loss: 0.5412 - accuracy: 0.7663 - recall_15: 0.7663 - precision_15: 0.766 - ETA: 3:51 - loss: 0.5384 - accuracy: 0.7688 - recall_15: 0.7688 - precision_15: 0.768 - ETA: 3:51 - loss: 0.5380 - accuracy: 0.7691 - recall_15: 0.7691 - precision_15: 0.769 - ETA: 3:51 - loss: 0.5402 - accuracy: 0.7673 - recall_15: 0.7673 - precision_15: 0.767 - ETA: 3:50 - loss: 0.5375 - accuracy: 0.7697 - recall_15: 0.7697 - precision_15: 0.769 - ETA: 3:50 - loss: 0.5371 - accuracy: 0.7700 - recall_15: 0.7700 - precision_15: 0.770 - ETA: 3:50 - loss: 0.5392 - accuracy: 0.7683 - recall_15: 0.7683 - precision_15: 0.768 - ETA: 3:49 - loss: 0.5389 - accuracy: 0.7686 - recall_15: 0.7686 - precision_15: 0.768 - ETA: 3:49 - loss: 0.5385 - accuracy: 0.7689 - recall_15: 0.7689 - precision_15: 0.768 - ETA: 3:49 - loss: 0.5429 - accuracy: 0.7654 - recall_15: 0.7654 - precision_1















Epoch 5/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.2631 - accuracy: 1.0000 - recall_15: 1.0000 - precision_15: 1.000 - ETA: 2:12 - loss: 0.5013 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:57 - loss: 0.5808 - accuracy: 0.7333 - recall_15: 0.7333 - precision_15: 0.733 - ETA: 3:18 - loss: 0.5571 - accuracy: 0.7500 - recall_15: 0.7500 - precision_15: 0.750 - ETA: 3:30 - loss: 0.5465 - accuracy: 0.7600 - recall_15: 0.7600 - precision_15: 0.760 - ETA: 3:38 - loss: 0.5002 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:43 - loss: 0.4662 - accuracy: 0.8286 - recall_15: 0.8286 - precision_15: 0.828 - ETA: 3:47 - loss: 0.4407 - accuracy: 0.8500 - recall_15: 0.8500 - precision_15: 0.850 - ETA: 3:50 - loss: 0.4477 - accuracy: 0.8444 - recall_15: 0.8444 - precision_15: 0.844 - ETA: 3:52 - loss: 0.4290 - accuracy: 0.8600 - recall_15: 0.8600 - precision_15: 0.860 - ETA: 3:55 - loss: 0.4404 - accuracy: 0.8545 - recall_15: 0.8545 - precision_15:

186/882 [=====>........................] - ETA: 3:52 - loss: 0.5064 - accuracy: 0.7979 - recall_15: 0.7979 - precision_15: 0.797 - ETA: 3:52 - loss: 0.5038 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:52 - loss: 0.5038 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:52 - loss: 0.5010 - accuracy: 0.8021 - recall_15: 0.8021 - precision_15: 0.802 - ETA: 3:51 - loss: 0.5036 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:51 - loss: 0.5010 - accuracy: 0.8020 - recall_15: 0.8020 - precision_15: 0.802 - ETA: 3:51 - loss: 0.5011 - accuracy: 0.8020 - recall_15: 0.8020 - precision_15: 0.802 - ETA: 3:50 - loss: 0.5011 - accuracy: 0.8020 - recall_15: 0.8020 - precision_15: 0.802 - ETA: 3:50 - loss: 0.5011 - accuracy: 0.8020 - recall_15: 0.8020 - precision_15: 0.802 - ETA: 3:50 - loss: 0.5011 - accuracy: 0.8019 - recall_15: 0.8019 - precision_15: 0.801 - ETA: 3:50 - loss: 0.4986 - accuracy: 0.8038 - recall_15: 0.8038 - precision_1















Epoch 6/10


 93/882 [==>...........................] - ETA: 7s - loss: 1.2461 - accuracy: 0.2000 - recall_15: 0.2000 - precision_15: 0.200 - ETA: 2:18 - loss: 0.8742 - accuracy: 0.5000 - recall_15: 0.5000 - precision_15: 0.500 - ETA: 3:01 - loss: 0.8327 - accuracy: 0.5333 - recall_15: 0.5333 - precision_15: 0.533 - ETA: 3:20 - loss: 0.7501 - accuracy: 0.6000 - recall_15: 0.6000 - precision_15: 0.600 - ETA: 3:37 - loss: 0.7005 - accuracy: 0.6400 - recall_15: 0.6400 - precision_15: 0.640 - ETA: 3:47 - loss: 0.7085 - accuracy: 0.6333 - recall_15: 0.6333 - precision_15: 0.633 - ETA: 3:53 - loss: 0.7154 - accuracy: 0.6286 - recall_15: 0.6286 - precision_15: 0.628 - ETA: 3:56 - loss: 0.7195 - accuracy: 0.6250 - recall_15: 0.6250 - precision_15: 0.625 - ETA: 3:58 - loss: 0.6946 - accuracy: 0.6444 - recall_15: 0.6444 - precision_15: 0.644 - ETA: 4:00 - loss: 0.6745 - accuracy: 0.6600 - recall_15: 0.6600 - precision_15: 0.660 - ETA: 4:02 - loss: 0.6819 - accuracy: 0.6545 - recall_15: 0.6545 - precision_15:

186/882 [=====>........................] - ETA: 3:58 - loss: 0.5723 - accuracy: 0.7468 - recall_15: 0.7468 - precision_15: 0.746 - ETA: 3:58 - loss: 0.5715 - accuracy: 0.7474 - recall_15: 0.7474 - precision_15: 0.747 - ETA: 3:58 - loss: 0.5684 - accuracy: 0.7500 - recall_15: 0.7500 - precision_15: 0.750 - ETA: 3:57 - loss: 0.5699 - accuracy: 0.7485 - recall_15: 0.7485 - precision_15: 0.748 - ETA: 3:57 - loss: 0.5716 - accuracy: 0.7469 - recall_15: 0.7469 - precision_15: 0.746 - ETA: 3:57 - loss: 0.5709 - accuracy: 0.7475 - recall_15: 0.7475 - precision_15: 0.747 - ETA: 3:57 - loss: 0.5704 - accuracy: 0.7480 - recall_15: 0.7480 - precision_15: 0.748 - ETA: 3:56 - loss: 0.5674 - accuracy: 0.7505 - recall_15: 0.7505 - precision_15: 0.750 - ETA: 3:56 - loss: 0.5668 - accuracy: 0.7510 - recall_15: 0.7510 - precision_15: 0.751 - ETA: 3:56 - loss: 0.5662 - accuracy: 0.7515 - recall_15: 0.7515 - precision_15: 0.751 - ETA: 3:56 - loss: 0.5633 - accuracy: 0.7538 - recall_15: 0.7538 - precision_1















Epoch 7/10


 93/882 [==>...........................] - ETA: 1s - loss: 0.7649 - accuracy: 0.6000 - recall_15: 0.6000 - precision_15: 0.600 - ETA: 2:11 - loss: 0.5039 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:55 - loss: 0.6673 - accuracy: 0.6667 - recall_15: 0.6667 - precision_15: 0.666 - ETA: 3:18 - loss: 0.7576 - accuracy: 0.6000 - recall_15: 0.6000 - precision_15: 0.600 - ETA: 3:31 - loss: 0.8117 - accuracy: 0.5600 - recall_15: 0.5600 - precision_15: 0.560 - ETA: 3:39 - loss: 0.7160 - accuracy: 0.6333 - recall_15: 0.6333 - precision_15: 0.633 - ETA: 3:44 - loss: 0.7595 - accuracy: 0.6000 - recall_15: 0.6000 - precision_15: 0.600 - ETA: 3:48 - loss: 0.7272 - accuracy: 0.6250 - recall_15: 0.6250 - precision_15: 0.625 - ETA: 3:51 - loss: 0.6730 - accuracy: 0.6667 - recall_15: 0.6667 - precision_15: 0.666 - ETA: 3:54 - loss: 0.6817 - accuracy: 0.6600 - recall_15: 0.6600 - precision_15: 0.660 - ETA: 3:55 - loss: 0.6655 - accuracy: 0.6727 - recall_15: 0.6727 - precision_15:

186/882 [=====>........................] - ETA: 3:52 - loss: 0.5656 - accuracy: 0.7511 - recall_15: 0.7511 - precision_15: 0.751 - ETA: 3:52 - loss: 0.5649 - accuracy: 0.7516 - recall_15: 0.7516 - precision_15: 0.751 - ETA: 3:51 - loss: 0.5643 - accuracy: 0.7521 - recall_15: 0.7521 - precision_15: 0.752 - ETA: 3:51 - loss: 0.5637 - accuracy: 0.7526 - recall_15: 0.7526 - precision_15: 0.752 - ETA: 3:51 - loss: 0.5631 - accuracy: 0.7531 - recall_15: 0.7531 - precision_15: 0.753 - ETA: 3:51 - loss: 0.5626 - accuracy: 0.7535 - recall_15: 0.7535 - precision_15: 0.753 - ETA: 3:50 - loss: 0.5623 - accuracy: 0.7540 - recall_15: 0.7540 - precision_15: 0.754 - ETA: 3:50 - loss: 0.5666 - accuracy: 0.7505 - recall_15: 0.7505 - precision_15: 0.750 - ETA: 3:50 - loss: 0.5660 - accuracy: 0.7510 - recall_15: 0.7510 - precision_15: 0.751 - ETA: 3:49 - loss: 0.5629 - accuracy: 0.7534 - recall_15: 0.7534 - precision_15: 0.753 - ETA: 3:49 - loss: 0.5696 - accuracy: 0.7481 - recall_15: 0.7481 - precision_1















Epoch 8/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.5006 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:13 - loss: 0.3668 - accuracy: 0.9000 - recall_15: 0.9000 - precision_15: 0.900 - ETA: 2:55 - loss: 0.5898 - accuracy: 0.7333 - recall_15: 0.7333 - precision_15: 0.733 - ETA: 3:18 - loss: 0.5062 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:32 - loss: 0.5051 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:40 - loss: 0.4598 - accuracy: 0.8333 - recall_15: 0.8333 - precision_15: 0.833 - ETA: 3:47 - loss: 0.5038 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:50 - loss: 0.4700 - accuracy: 0.8250 - recall_15: 0.8250 - precision_15: 0.825 - ETA: 3:53 - loss: 0.4441 - accuracy: 0.8444 - recall_15: 0.8444 - precision_15: 0.844 - ETA: 3:56 - loss: 0.4230 - accuracy: 0.8600 - recall_15: 0.8600 - precision_15: 0.860 - ETA: 3:57 - loss: 0.4303 - accuracy: 0.8545 - recall_15: 0.8545 - precision_15:

186/882 [=====>........................] - ETA: 3:54 - loss: 0.5123 - accuracy: 0.7915 - recall_15: 0.7915 - precision_15: 0.791 - ETA: 3:54 - loss: 0.5093 - accuracy: 0.7937 - recall_15: 0.7937 - precision_15: 0.793 - ETA: 3:53 - loss: 0.5093 - accuracy: 0.7937 - recall_15: 0.7937 - precision_15: 0.793 - ETA: 3:53 - loss: 0.5064 - accuracy: 0.7959 - recall_15: 0.7959 - precision_15: 0.795 - ETA: 3:53 - loss: 0.5064 - accuracy: 0.7959 - recall_15: 0.7959 - precision_15: 0.795 - ETA: 3:52 - loss: 0.5037 - accuracy: 0.7980 - recall_15: 0.7980 - precision_15: 0.798 - ETA: 3:52 - loss: 0.5062 - accuracy: 0.7960 - recall_15: 0.7960 - precision_15: 0.796 - ETA: 3:52 - loss: 0.5087 - accuracy: 0.7941 - recall_15: 0.7941 - precision_15: 0.794 - ETA: 3:52 - loss: 0.5107 - accuracy: 0.7922 - recall_15: 0.7922 - precision_15: 0.792 - ETA: 3:51 - loss: 0.5080 - accuracy: 0.7942 - recall_15: 0.7942 - precision_15: 0.794 - ETA: 3:51 - loss: 0.5105 - accuracy: 0.7923 - recall_15: 0.7923 - precision_1















Epoch 9/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.9629 - accuracy: 0.4000 - recall_15: 0.4000 - precision_15: 0.400 - ETA: 2:11 - loss: 0.6158 - accuracy: 0.7000 - recall_15: 0.7000 - precision_15: 0.700 - ETA: 2:54 - loss: 0.5781 - accuracy: 0.7333 - recall_15: 0.7333 - precision_15: 0.733 - ETA: 3:16 - loss: 0.5615 - accuracy: 0.7500 - recall_15: 0.7500 - precision_15: 0.750 - ETA: 3:31 - loss: 0.5008 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:39 - loss: 0.5370 - accuracy: 0.7667 - recall_15: 0.7667 - precision_15: 0.766 - ETA: 3:46 - loss: 0.5321 - accuracy: 0.7714 - recall_15: 0.7714 - precision_15: 0.771 - ETA: 3:49 - loss: 0.5599 - accuracy: 0.7500 - recall_15: 0.7500 - precision_15: 0.750 - ETA: 3:53 - loss: 0.6042 - accuracy: 0.7111 - recall_15: 0.7111 - precision_15: 0.711 - ETA: 3:56 - loss: 0.5970 - accuracy: 0.7200 - recall_15: 0.7200 - precision_15: 0.720 - ETA: 3:59 - loss: 0.6328 - accuracy: 0.6909 - recall_15: 0.6909 - precision_15:

186/882 [=====>........................] - ETA: 3:54 - loss: 0.5560 - accuracy: 0.7574 - recall_15: 0.7574 - precision_15: 0.757 - ETA: 3:54 - loss: 0.5531 - accuracy: 0.7600 - recall_15: 0.7600 - precision_15: 0.760 - ETA: 3:54 - loss: 0.5576 - accuracy: 0.7563 - recall_15: 0.7563 - precision_15: 0.756 - ETA: 3:53 - loss: 0.5595 - accuracy: 0.7546 - recall_15: 0.7546 - precision_15: 0.754 - ETA: 3:53 - loss: 0.5565 - accuracy: 0.7571 - recall_15: 0.7571 - precision_15: 0.757 - ETA: 3:53 - loss: 0.5559 - accuracy: 0.7576 - recall_15: 0.7576 - precision_15: 0.757 - ETA: 3:52 - loss: 0.5530 - accuracy: 0.7600 - recall_15: 0.7600 - precision_15: 0.760 - ETA: 3:52 - loss: 0.5525 - accuracy: 0.7604 - recall_15: 0.7604 - precision_15: 0.760 - ETA: 3:52 - loss: 0.5496 - accuracy: 0.7627 - recall_15: 0.7627 - precision_15: 0.762 - ETA: 3:52 - loss: 0.5493 - accuracy: 0.7631 - recall_15: 0.7631 - precision_15: 0.763 - ETA: 3:51 - loss: 0.5509 - accuracy: 0.7615 - recall_15: 0.7615 - precision_1















Epoch 10/10


 93/882 [==>...........................] - ETA: 0s - loss: 0.5018 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 2:11 - loss: 0.3765 - accuracy: 0.9000 - recall_15: 0.9000 - precision_15: 0.900 - ETA: 2:53 - loss: 0.5018 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:18 - loss: 0.5644 - accuracy: 0.7500 - recall_15: 0.7500 - precision_15: 0.750 - ETA: 3:34 - loss: 0.5019 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:43 - loss: 0.5009 - accuracy: 0.8000 - recall_15: 0.8000 - precision_15: 0.800 - ETA: 3:48 - loss: 0.6081 - accuracy: 0.7143 - recall_15: 0.7143 - precision_15: 0.714 - ETA: 3:53 - loss: 0.5950 - accuracy: 0.7250 - recall_15: 0.7250 - precision_15: 0.725 - ETA: 3:56 - loss: 0.5570 - accuracy: 0.7556 - recall_15: 0.7556 - precision_15: 0.755 - ETA: 3:58 - loss: 0.5726 - accuracy: 0.7400 - recall_15: 0.7400 - precision_15: 0.740 - ETA: 4:00 - loss: 0.5436 - accuracy: 0.7636 - recall_15: 0.7636 - precision_15:

186/882 [=====>........................] - ETA: 3:55 - loss: 0.5143 - accuracy: 0.7894 - recall_15: 0.7894 - precision_15: 0.789 - ETA: 3:55 - loss: 0.5117 - accuracy: 0.7916 - recall_15: 0.7916 - precision_15: 0.791 - ETA: 3:54 - loss: 0.5142 - accuracy: 0.7896 - recall_15: 0.7896 - precision_15: 0.789 - ETA: 3:54 - loss: 0.5142 - accuracy: 0.7897 - recall_15: 0.7897 - precision_15: 0.789 - ETA: 3:54 - loss: 0.5115 - accuracy: 0.7918 - recall_15: 0.7918 - precision_15: 0.791 - ETA: 3:54 - loss: 0.5114 - accuracy: 0.7919 - recall_15: 0.7919 - precision_15: 0.791 - ETA: 3:53 - loss: 0.5087 - accuracy: 0.7940 - recall_15: 0.7940 - precision_15: 0.794 - ETA: 3:53 - loss: 0.5112 - accuracy: 0.7921 - recall_15: 0.7921 - precision_15: 0.792 - ETA: 3:53 - loss: 0.5132 - accuracy: 0.7902 - recall_15: 0.7902 - precision_15: 0.790 - ETA: 3:53 - loss: 0.5131 - accuracy: 0.7903 - recall_15: 0.7903 - precision_15: 0.790 - ETA: 3:52 - loss: 0.5151 - accuracy: 0.7885 - recall_15: 0.7885 - precision_1

















In [158]:
ypred = list()
for i in range(X_test.shape[0]):
    yhat = model.predict(np.expand_dims(X_test[i], axis=0))
    ypred.append(np.argmax(yhat))

In [162]:
print(confusion_matrix(ypred, Y_test))
print(accuracy_score(ypred, Y_test))

[[411  30]
 [ 47   4]]
0.8434959349593496
