In [2]:
import os
import time
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from tensorboardX import SummaryWriter
from sklearn.metrics import classification_report, confusion_matrix
from collections import Counter

import load 
from load_data import ECGDataset, ECGCollate, SmartBatchSampler
from resnet1d import ResNet1D

%load_ext autoreload
%autoreload 2

os.environ['KMP_DUPLICATE_LIB_OK']='True' # To prevent the kernel from dying.

In [3]:
batch_size = 128 #128 dans le papier de Hannun et al. mais 32 dans le config.json leurs test sur cinc17...

writer = SummaryWriter('runs/')

In [4]:
%load_ext tensorboard
%tensorboard --logdir runs/

In [5]:
print("Loading training set...")
train = load.load_dataset("train.json")
ecgs, labels = train
print(f"len(X) : \n {len(ecgs)}")
print(f"len(y) : \n {len(labels)}")
for i in range(3):
    print(f"ecg_{i} : \n {ecgs[i]}")
    print(f"len(ecg_{i}) : {len(ecgs[i])}")
    print(f"label_{i} :\n {labels[i]}")
    print(f"len(label_{i}) : {len(labels[i])}")

Loading training set...


100%|██████████| 7676/7676 [00:01<00:00, 4935.72it/s]

len(X) : 
 7676
len(y) : 
 7676
ecg_0 : 
 [  72   83   93 ... -136 -133 -131]
len(ecg_0) : 8960
label_0 :
 ['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
len(label_0) : 35
ecg_1 : 
 [-137 -167 -200 ...  -50  -51  -51]
len(ecg_1) : 8960
label_1 :
 ['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
len(label_1) : 35
ecg_2 : 
 [620 780 914 ... 102 115 116]
len(ecg_2) : 8960
label_2 :
 ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
len(label_2) : 35





In [6]:
print("Loading dev set...")
val = load.load_dataset("dev.json")

Loading dev set...


100%|██████████| 852/852 [00:00<00:00, 5502.94it/s]


In [7]:
train_dataset = ECGDataset(*train)
val_dataset = ECGDataset(*val)

MEAN :  7.4661856  STD :  236.10312
self.classes :  ['A', 'N', 'O', '~']
self.class_to_int :  {'A': 0, 'N': 1, 'O': 2, '~': 3}
MEAN :  8.029898  STD :  242.35907
self.classes :  ['A', 'N', 'O', '~']
self.class_to_int :  {'A': 0, 'N': 1, 'O': 2, '~': 3}


In [8]:

# Instanciation du Sampler intelligent
train_batch_sampler = SmartBatchSampler(train_dataset, 32)
val_batch_sampler = SmartBatchSampler(val_dataset, 32)

train_collate_fn = ECGCollate(
    pad_val_x=train_dataset.pad_value_x_normalized,
    num_classes=train_dataset.num_classes
)

val_collate_fn = ECGCollate(
    pad_val_x=val_dataset.pad_value_x_normalized,
    num_classes=val_dataset.num_classes
)

train_loader = DataLoader(
    train_dataset,
    batch_sampler=train_batch_sampler, 
    collate_fn=train_collate_fn,
    num_workers=4
)

val_loader = DataLoader(
    val_dataset,
    batch_sampler=val_batch_sampler, 
    collate_fn=val_collate_fn,
    num_workers=4
)

Tri du dataset par longueur pour minimiser le padding...
Tri du dataset par longueur pour minimiser le padding...


In [8]:
%load_ext tensorboard
%tensorboard --logdir logs --port 6006

In [9]:
# make model
device_str = "cuda"
device = torch.device(device_str if torch.cuda.is_available() else "cpu")
print(f"Running on {device}")
kernel_size = 16 # 16 in Hannun et al.
stride = 2
n_block = 16 # 16 in Hannun et al.
downsample_gap = 2 # 2 in Hannun et al.
increasefilter_gap = 4 # 4 in Hannun et al.

model = ResNet1D(
    in_channels=1, 
    base_filters=32, # 32 in Hannun et al.
    kernel_size=kernel_size, 
    stride=stride, 
    groups=1, # like a classical ResNet
    n_block=n_block, 
    n_classes=2, 
    downsample_gap=downsample_gap, 
    increasefilter_gap=increasefilter_gap, 
    use_bn=True,
    use_do=True,
    verbose=True
    )

model.to(device)

summary(model, (1, 100), device=device_str)

Running on cpu
input shape torch.Size([2, 1, 100])
after first conv torch.Size([2, 32, 100])
i_block: 0, in_channels: 32, out_channels: 32, downsample: False
torch.Size([2, 32, 100])
i_block: 1, in_channels: 32, out_channels: 32, downsample: True
torch.Size([2, 32, 50])
i_block: 2, in_channels: 32, out_channels: 32, downsample: False
torch.Size([2, 32, 50])
i_block: 3, in_channels: 32, out_channels: 32, downsample: True
torch.Size([2, 32, 25])
i_block: 4, in_channels: 32, out_channels: 64, downsample: False
torch.Size([2, 64, 25])
i_block: 5, in_channels: 64, out_channels: 64, downsample: True
torch.Size([2, 64, 13])
i_block: 6, in_channels: 64, out_channels: 64, downsample: False
torch.Size([2, 64, 13])
i_block: 7, in_channels: 64, out_channels: 64, downsample: True
torch.Size([2, 64, 7])
i_block: 8, in_channels: 64, out_channels: 128, downsample: False
torch.Size([2, 128, 7])
i_block: 9, in_channels: 128, out_channels: 128, downsample: True
torch.Size([2, 128, 4])
i_block: 10, in_cha

In [12]:
import torch.optim as optim
# train and test
model.verbose = False
optimizer = optim.Adam(
    model.parameters(), 
    lr=1e-3, 
    weight_decay=1e-3
    )
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.1, # like in Hannun et al.
    patience=2 # 2 in Hannun et al. "two consecutive epochs"
    )

loss_func = torch.nn.CrossEntropyLoss()

n_epoch = 50 # 100 in Hannun et al. code 
step = 0 # ?
for _ in tqdm(range(n_epoch), desc="epoch", leave=False):

    # train
    model.train()
    prog_iter = tqdm(train_loader, desc="Training", leave=False)
    for batch_idx, batch in enumerate(prog_iter):

        input_x, input_y = tuple(t.to(device) for t in batch)
        pred = model(input_x)
        loss = loss_func(pred, input_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        step += 1

        writer.add_scalar('Loss/train', loss.item(), step)
    
    scheduler.step(_)
                
    # test
    model.eval()
    prog_iter_test = tqdm(dataloader_test, desc="Testing", leave=False)
    all_pred_prob = []
    with torch.no_grad():
        for batch_idx, batch in enumerate(prog_iter_test):
            input_x, input_y = tuple(t.to(device) for t in batch)
            pred = model(input_x)
            all_pred_prob.append(pred.cpu().data.numpy())
    all_pred_prob = np.concatenate(all_pred_prob)
    all_pred = np.argmax(all_pred_prob, axis=1)
    ## vote most common
    final_pred = []
    final_gt = []
    for i_pid in np.unique(pid_test):
        tmp_pred = all_pred[pid_test==i_pid]
        tmp_gt = Y_test[pid_test==i_pid]
        final_pred.append(Counter(tmp_pred).most_common(1)[0][0])
        final_gt.append(Counter(tmp_gt).most_common(1)[0][0])
    ## classification report
    tmp_report = classification_report(final_gt, final_pred, output_dict=True)
    print(confusion_matrix(final_gt, final_pred))
    f1_score = (tmp_report['0']['f1-score'] + tmp_report['1']['f1-score'] + tmp_report['2']['f1-score'] + tmp_report['3']['f1-score'])/4
    f1_score = (tmp_report['0']['f1-score'] + tmp_report['1']['f1-score'])/2
    writer.add_scalar('F1/f1_score', f1_score, _)
    writer.add_scalar('F1/label_0', tmp_report['0']['f1-score'], _)
    writer.add_scalar('F1/label_1', tmp_report['1']['f1-score'], _)
    writer.add_scalar('F1/label_2', tmp_report['2']['f1-score'], _)
    writer.add_scalar('F1/label_3', tmp_report['3']['f1-score'], _)

                                             

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

In [9]:
def create_tqdm_bar(iterable, desc):
    return tqdm(enumerate(iterable),total=len(iterable), ncols=150, desc=desc)


def train_model(model, train_loader, val_loader, loss_func, tb_logger, epochs=10, name="default"):
    """
    Train the classifier for a number of epochs.
    """
    loss_cutoff = len(train_loader) // 10
    optimizer = torch.optim.Adam(model.parameters(), 0.001)

    # The scheduler is used to change the learning rate every few "n" steps.
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(epochs * len(train_loader) / 5), gamma=hparams.get('gamma', 0.8))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                            mode='min', 
                                                            factor=0.1, # like in Hannun et al.
                                                            patience=2 # 2 in Hannun et al. "two consecutive epochs"
                                                            )
    for epoch in range(epochs):

        # Training stage, where we want to update the parameters.
        model.train()  # Set the model to training mode

        training_loss = []
        validation_loss = []

        # Create a progress bar for the training loop.
        training_loop = create_tqdm_bar(train_loader, desc=f'Training Epoch [{epoch + 1}/{epochs}]')
        for train_iteration, batch in training_loop:
            optimizer.zero_grad() # Reset the gradients - VERY important! Otherwise they accumulate.
            ecgs, labels = batch # Get the images and labels from the batch, in the fashion we defined in the dataset and dataloader.
            ecgs, labels = ecgs.to(device), labels.to(device) # Send the data to the device (GPU or CPU) - it has to be the same device as the model.

            # Flatten the images to a vector. This is done because the classifier expects a vector as input.
            # Could also be done by reshaping the images in the dataset.
            # images = images.view(images.shape[0], -1)

            pred = model(ecgs) # Stage 1: Forward().
            loss = loss_func(pred, labels) # Compute the loss over the predictions and the ground truth.
            loss.backward()  # Stage 2: Backward().
            optimizer.step() # Stage 3: Update the parameters.
            scheduler.step() # Update the learning rate.


            training_loss.append(loss.item())
            training_loss = training_loss[-loss_cutoff:]

            # Update the progress bar.
            training_loop.set_postfix(curr_train_loss = "{:.8f}".format(np.mean(training_loss)),
                                      lr = "{:.8f}".format(optimizer.param_groups[0]['lr'])
            )

            # Update the tensorboard logger.
            tb_logger.add_scalar(f'classifier_{name}/train_loss', loss.item(), epoch * len(train_loader) + train_iteration)

        # Validation stage, where we don't want to update the parameters. Pay attention to the classifier.eval() line
        # and "with torch.no_grad()" wrapper.
        model.eval()
        val_loop = create_tqdm_bar(val_loader, desc=f'Validation Epoch [{epoch + 1}/{epochs}]')

        with torch.no_grad():
            for val_iteration, batch in val_loop:
                ecgs, labels = batch
                ecgs, labels = ecgs.to(device), labels.to(device)

                #ecgs = ecgs.view(images.shape[0], -1)
                pred = model(ecgs)
                loss = loss_func(pred, labels)
                validation_loss.append(loss.item())
                # Update the progress bar.
                val_loop.set_postfix(val_loss = "{:.8f}".format(np.mean(validation_loss)))

                # Update the tensorboard logger.
                tb_logger.add_scalar(f'classifier_{name}/val_loss', loss.item(), epoch * len(val_loader) + val_iteration)


In [24]:
# Create a tensorboard logger.
# NOTE: In order to see the logs, run the following command in the terminal: tensorboard --logdir=./
# Also, in order to reset the logs, delete the logs folder MANUALLY.

path = "logs"
num_of_runs = len(os.listdir(path)) if os.path.exists(path) else 0
path = os.path.join(path, f'run_{num_of_runs + 1}')

tb_logger = SummaryWriter(path)

epochs = 20

loss_func = nn.CrossEntropyLoss() # The loss function we use for classification.

# make model
device_str = "cuda"
device = torch.device(device_str if torch.cuda.is_available() else "cpu")
print(f"Running on {device}")

kernel_size = 16 # 16 in Hannun et al.
stride = 2
n_block = 16 # 16 in Hannun et al.
downsample_gap = 2 # 2 in Hannun et al.
increasefilter_gap = 4 # 4 in Hannun et al.

model = ResNet1D(
    in_channels=1, 
    base_filters=32, # 32 in Hannun et al.
    kernel_size=kernel_size, 
    stride=stride, 
    groups=1, # like a classical ResNet
    n_block=n_block, 
    n_classes=2, 
    downsample_gap=downsample_gap, 
    increasefilter_gap=increasefilter_gap, 
    use_bn=True,
    use_do=True
    )

model.to(device)

summary(model, (1,100), device=device_str)

model.verbose = False

train_model(model, train_loader, val_loader, loss_func, tb_logger, epochs=epochs, name="Default")

print()
print("Finished training!")
print("How did we do? Let's check the accuracy of the defaut classifier on the training and validation sets:")
print(f"Training Acc: {model.getTestAcc(labled_train_loader)[1] * 100}%")
print(f"Validation Acc: {model.getTestAcc(labled_val_loader)[1] * 100}%")

Running on cpu
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1              [-1, 32, 100]             544
   MyConv1dPadSame-2              [-1, 32, 100]               0
       BatchNorm1d-3              [-1, 32, 100]              64
              ReLU-4              [-1, 32, 100]               0
            Conv1d-5              [-1, 32, 100]          16,416
   MyConv1dPadSame-6              [-1, 32, 100]               0
       BatchNorm1d-7              [-1, 32, 100]              64
              ReLU-8              [-1, 32, 100]               0
           Dropout-9              [-1, 32, 100]               0
           Conv1d-10              [-1, 32, 100]          16,416
  MyConv1dPadSame-11              [-1, 32, 100]               0
       BasicBlock-12              [-1, 32, 100]               0
      BatchNorm1d-13              [-1, 32, 100]              64
             ReLU-14    

Training Epoch [1/20]:   0%|                                                                                                  | 0/240 [00:02<?, ?it/s]
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1f3621dbc0>
Traceback (most recent call last):
  File "/opt/python/lib/python3.13/site-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/opt/python/lib/python3.13/site-packages/torch/utils/data/dataloader.py", line 1618, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/python/lib/python3.13/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/opt/python/lib/python3.13/multiprocessing/popen_fork.py", line 41, in wait
    if not wait([self.sentinel], timeout):
  File "/opt/python/lib/python3.13/multiprocessing/connection.py", line 1148, in wait
    ready = selector.select(timeout)
  File "/opt/python/lib/python3.13/selectors.py", line 39

IndexError: Target 2 is out of bounds.