In [1]:
!git clone -b fix/preprocessing https://github.com/dokunoale/chagas.git
!pip install -r chagas/requirements.txt

# Download the dataset
import gdown

url = "https://drive.google.com/file/d/1vK_gMI36xjdtiJjMBV7dKHurIExQs4hC/view?usp=drive_link"
gdown.download(url, "dataset.zip", quiet=False, fuzzy=True)
!unzip -q /content/dataset.zip -d chagas/data/

Cloning into 'chagas'...
remote: Enumerating objects: 529, done.[K
remote: Counting objects: 100% (91/91), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 529 (delta 47), reused 58 (delta 27), pack-reused 438 (from 1)[K
Receiving objects: 100% (529/529), 3.40 MiB | 13.05 MiB/s, done.
Resolving deltas: 100% (261/261), done.
Collecting h5py==3.12.1 (from -r chagas/requirements.txt (line 3))
  Downloading h5py-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)
Collecting scikit-learn==1.6.0 (from -r chagas/requirements.txt (line 4))
  Downloading scikit_learn-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting wfdb==4.1.2 (from -r chagas/requirements.txt (line 5))
  Downloading wfdb-4.1.2-py3-none-any.whl.metadata (4.3 kB)
Collecting joblib==1.4.2 (from -r chagas/requirements.txt (line 6))
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Downloading h5py-3.12.1-cp311-cp311-m

Downloading...
From (original): https://drive.google.com/uc?id=1vK_gMI36xjdtiJjMBV7dKHurIExQs4hC
From (redirected): https://drive.google.com/uc?id=1vK_gMI36xjdtiJjMBV7dKHurIExQs4hC&confirm=t&uuid=7e33c763-5fef-40a2-bb6e-fa974bf0e273
To: /content/dataset.zip
100%|██████████| 623M/623M [00:11<00:00, 52.8MB/s]


In [2]:
import sys
sys.path.append("/content/chagas/src")

import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import AUC

Caricamento del dataset e preprocessing.

In [3]:
from preprocessing.tf_dataset_loader import WfdbLoader
from preprocessing.filters import FiltfiltNoiseReducer

# Carichiamo il dataset
data_path = "/content/chagas/data/big_splitted_dataset/"

filter = FiltfiltNoiseReducer(fs=400, iir_freq=55.0, butterworth_cutoff=5., lowpass_cutoff=30., verbose=True)

train = WfdbLoader(label = 'Chagas label')
train.add_filter(filter)
train.add_filter(filter)
train.add_dataset(data_path + "train/positives")
train.add_dataset(data_path + "train/negatives")
X_train, y_train = train.load(shuffle=True, verbose=True)

validation = WfdbLoader(label = 'Chagas label')
validation.add_filter(filter)
validation.add_filter(filter)
validation.add_dataset(data_path + "val/positives")
validation.add_dataset(data_path + "val/negatives")
X_val, y_val = validation.load(shuffle=True, verbose=True)

test = WfdbLoader(label = 'Chagas label')
test.add_filter(filter)
test.add_filter(filter)
test.add_dataset(data_path + "test/positives")
test.add_dataset(data_path + "test/negatives")
X_test, y_test = test.load(shuffle=True, verbose=True)

y_test_info = test.get_metadata()

Loading records: 100%|██████████| 7855/7855 [00:39<00:00, 200.38record/s]
Filtering records - filtfilt: 100%|██████████| 7855/7855 [00:41<00:00, 187.56record/s]
Filtering records - filtfilt: 100%|██████████| 7855/7855 [00:41<00:00, 189.97record/s]
Loading records: 100%|██████████| 1122/1122 [00:05<00:00, 201.34record/s]
Filtering records - filtfilt: 100%|██████████| 1122/1122 [00:05<00:00, 191.66record/s]
Filtering records - filtfilt: 100%|██████████| 1122/1122 [00:05<00:00, 191.12record/s]
Loading records: 100%|██████████| 2245/2245 [00:11<00:00, 203.93record/s]
Filtering records - filtfilt: 100%|██████████| 2245/2245 [00:11<00:00, 194.42record/s]
Filtering records - filtfilt: 100%|██████████| 2245/2245 [00:11<00:00, 195.73record/s]


# Training

In [None]:
# MODELLO DI ESEMPIO
from models.layers import LightLogSpectrogram

def build_cnn_model(input_shape=(2800, 12)):
    inputs = tf.keras.Input(shape=input_shape)  # 12-lead ECG time series

    # Logarithmic spectrogram
    x = LightLogSpectrogram()(inputs)  # shape: [batch, time, freq, 12]

    # Simple Conv2D block
    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D((2, 2))(x)
    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='Conv2')(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)  # Binary output

    return tf.keras.Model(inputs, outputs)

# Compilazione del modello
model = build_cnn_model(input_shape=(2800, 12))
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', AUC(name='auc')]
)

model.summary()

In [None]:
# Callback: early stopping e salvataggio del modello migliore
early_stop = EarlyStopping(monitor='val_auc', patience=6, min_delta=1e-3, mode='max', restore_best_weights=True)
checkpoint = ModelCheckpoint("best_model.keras", monitor='val_auc', mode='max', save_best_only=True)

callbacks = [early_stop, checkpoint]

# Training
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=40,
    batch_size=30,
    callbacks=callbacks
)

Stampa un report che riassume il modello e le sue performance

In [None]:
# NON MODIFICARE

from models.utils import find_optimal_threshold
from models.analysis import plot_full_report_and_metrics, plot_model_analysis
from PIL import Image

y_pred_probs = model.predict(X_val)
threshold = find_optimal_threshold(y_val, y_pred_probs)

img1 = plot_full_report_and_metrics(model, X_test, y_test, history, threshold, return_pillow=True)
img2 = plot_model_analysis(model, X_test, y_test, y_test_info, threshold, return_pillow=True)

# 1. Ridimensiona img2 all'altezza di img1, mantenendo le proporzioni
new_height = int(img1.height * 1.2)
new_width = int(img2.width * (new_height / img2.height))
img2_resized = img2.resize((new_width, new_height), Image.LANCZOS)

# 2. Calcola la nuova immagine
total_width = img1.width + img2_resized.width
max_height = max(img1.height, img2_resized.height)
new_img = Image.new('RGB', (total_width, max_height), color=(255, 255, 255))

# 3. Centra verticalmente img1 (se img1 è più bassa di max_height)
y_offset_img1 = (max_height - img1.height) // 2
y_offset_img2 = (max_height - img2_resized.height) // 2

new_img.paste(img1, (0, y_offset_img1))
new_img.paste(img2_resized, (img1.width, y_offset_img2))

# 4. Visualizza nel notebook
display(new_img)