In [1]:
import os 
from utils.load_spectrograms import load_large_pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import json

In [2]:
root = os.getcwd()
directory= '/preprocessed_dataset/cough-classification-data_ClassABC_spectrograms_abc.pkl'

In [3]:
pickle_path = root+directory 
print(pickle_path)

/Users/rodolfolobocarrasco/Documents/projects/ESPOL-PROJECT/ESPOL-coughvid-project/preprocessed_dataset/cough-classification-data_ClassABC_spectrograms_abc.pkl


In [4]:
spectrograms_data = load_large_pickle(pickle_path)

In [5]:
len(spectrograms_data)

1589

In [12]:
def prepare_training_data(spectrograms_data):
    """
    Prepara los datos para el entrenamiento del modelo CNN.
    """
    # Extraer MFCCs y etiquetas
    X = []
    y = []
    
    for uuid in spectrograms_data.keys():
        mfcc = np.array(spectrograms_data[uuid]['mfcc']['coefficients'])
        status = spectrograms_data[uuid]['features']['STATUS']
        
        # Añadir dimensión de canal
        mfcc = np.expand_dims(mfcc, axis=-1)
        
        X.append(mfcc)
        y.append(status)
    
    X = np.array(X)
    y = np.array(y)
    
    # Codificar etiquetas
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    y_onehot = tf.keras.utils.to_categorical(y_encoded)
    
    return X, y_onehot, label_encoder

def create_cnn_model(input_shape, num_classes):
    """
    Crea un modelo CNN para clasificación de audio.
    """
    model = models.Sequential([
        # Primera capa convolucional
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Segunda capa convolucional
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Tercera capa convolucional
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Cuarta capa convolucional
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Capas densas
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

def train_model(X, y, label_encoder, batch_size=32, epochs=50):
    """
    Entrena el modelo CNN.
    """
    # Dividir datos en train/validation/test
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    # Crear y compilar modelo
    input_shape = X_train.shape[1:]
    num_classes = y_train.shape[1]
    
    model = create_cnn_model(input_shape, num_classes)
    
    # Compilar modelo
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Callbacks
    # early_stopping = tf.keras.callbacks.EarlyStopping(
    #     monitor='val_loss',
    #     patience=15,
    #     restore_best_weights=True,
    #     verbose=1
    # )
    
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=15,
        min_lr=1e-7
    )
    
    # Entrenar modelo
    history = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(X_val, y_val),
        callbacks=[reduce_lr]
    )
    
    # Evaluar modelo
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"\nTest accuracy: {test_acc:.4f}")
    
    # Predicciones en conjunto de prueba
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_test_classes = np.argmax(y_test, axis=1)
    
    # Imprimir reporte de clasificación
    from sklearn.metrics import classification_report
    class_names = label_encoder.classes_
    print("\nClassification Report:")
    print(classification_report(
        y_test_classes,
        y_pred_classes,
        target_names=class_names
    ))
    
    return model, history, (X_test, y_test)

In [13]:
# Preparar datos
X, y, label_encoder = prepare_training_data(spectrograms_data)

# Entrenar modelo
model, history, test_data = train_model(
X,
y,
label_encoder,
batch_size=32,
epochs=100
)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 71ms/step - accuracy: 0.3299 - loss: 1.8536 - val_accuracy: 0.3361 - val_loss: 17.8534 - learning_rate: 0.0010
Epoch 2/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 68ms/step - accuracy: 0.3173 - loss: 1.7048 - val_accuracy: 0.3571 - val_loss: 2.4863 - learning_rate: 0.0010
Epoch 3/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 68ms/step - accuracy: 0.4241 - loss: 1.3218 - val_accuracy: 0.3529 - val_loss: 1.4166 - learning_rate: 0.0010
Epoch 4/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 70ms/step - accuracy: 0.4026 - loss: 1.2223 - val_accuracy: 0.3193 - val_loss: 1.5697 - learning_rate: 0.0010
Epoch 5/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 69ms/step - accuracy: 0.3875 - loss: 1.2376 - val_accuracy: 0.3824 - val_loss: 1.1233 - learning_rate: 0.0010
Epoch 6/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 69ms/