<a href="https://colab.research.google.com/github/guidogagl/physioex/blob/main/examples/colab_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# mount your personal gdrive folder inside google colab server
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import os


def download_mitdb():
    home_dir = os.path.expanduser("~")
    mitdb_dir = os.path.join(home_dir, "mitdb")
    if not os.path.exists(mitdb_dir):
        os.makedirs(mitdb_dir)

    # Cambia la directory di lavoro corrente alla cartella CINC
    current_dir = os.getcwd()
    os.chdir(mitdb_dir)

    # Download the dataset into the current working dir using wfdb
    wfdb.dl_database("mitdb", os.getcwd())

    print("Dataset downloaded.")
    os.chdir(current_dir)


download_mitdb()

In [5]:
import wfdb
import numpy as np
import os

mitdb_dir = "/home/guido/mitdb"

pick = "MLII"
subjects = [
    int(f.split(".")[0])
    for f in os.listdir(mitdb_dir)
    if os.path.isfile(os.path.join(mitdb_dir, f))
]
# get the integer part of the file name
subjects = np.unique(subjects).astype(int)

for subject in subjects:
    samp = wfdb.rdsamp(f"{mitdb_dir}/{subject}")
    annt = wfdb.rdann(f"{mitdb_dir}/{subject}", "atr")

    try:
        signal = samp[0][:, samp[1]["sig_name"].index(pick)]
    except:
        print(f"Signal {pick} not found in subject {subject}.")
        print(f"Available signals are: {samp[1]['sig_name']}")
        continue

Signal MLII not found in subject 102.
Available signals are: ['V5', 'V2']
Signal MLII not found in subject 104.
Available signals are: ['V5', 'V2']


In [5]:
import numpy as np
from sklearn.model_selection import StratifiedGroupKFold
X = np.ones((17, 2))
y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])
sgkf = StratifiedGroupKFold(n_splits=3, shuffle=True, random_state=42)
sgkf.get_n_splits(X, y)
print(sgkf)
for i, (train_index, test_index) in enumerate(sgkf.split(X, y, groups)):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"         group={groups[train_index]}")
    print(f"  Test:  index={test_index}")
    print(f"         group={groups[test_index]}")

StratifiedGroupKFold(n_splits=3, random_state=42, shuffle=True)
Fold 0:
  Train: index=[ 0  1  2  3  7 12 13 14]
         group=[1 1 2 2 4 6 6 7]
  Test:  index=[ 4  5  6  8  9 10 11 15 16]
         group=[3 3 3 5 5 5 5 8 8]
Fold 1:
  Train: index=[ 4  5  6  8  9 10 11 12 13 14 15 16]
         group=[3 3 3 5 5 5 5 6 6 7 8 8]
  Test:  index=[0 1 2 3 7]
         group=[1 1 2 2 4]
Fold 2:
  Train: index=[ 0  1  2  3  4  5  6  7  8  9 10 11 15 16]
         group=[1 1 2 2 3 3 3 4 5 5 5 5 8 8]
  Test:  index=[12 13 14]
         group=[6 6 7]


In [3]:
import random

train_ids = [
    101,
    106,
    108,
    109,
    112,
    114,
    115,
    116,
    118,
    119,
    122,
    124,
    201,
    203,
    205,
    207,
    208,
    209,
    215,
    220,
    223,
    230,
]
# select 20% of the subjects as valid subjects
valid_ids = random.sample(train_ids, int(0.2 * len(train_ids)))

train_ids = [subject for subject in train_ids if subject not in valid_ids]

print(valid_ids)
print(train_ids)

[203, 116, 106, 112]
[101, 108, 109, 114, 115, 118, 119, 122, 124, 201, 205, 207, 208, 209, 215, 220, 223, 230]


In [12]:
import os
import numpy as np
import random
import yaml

# import KFold from sklearn
from sklearn.model_selection import KFold

# Ottieni l'elenco di tutti i file nella directory
all_files = os.listdir("/home/guido/mitdb")

# Rimuovi le estensioni dei file per ottenere solo gli ID dei soggetti
all_ids = [os.path.splitext(file)[0] for file in all_files]
all_ids = np.unique(all_ids).astype(int)
print(all_ids)
# Mescola gli ID dei soggetti
# remove subject 102 and 104 from the ids
all_ids = sorted(np.array([id for id in all_ids if id not in [102, 104]]).tolist())

# esegui una 10-cross fold degli id
split = {"subjects": all_ids.copy(), "split": {}}

random.shuffle(all_ids)
all_ids = np.array(all_ids)
print(all_ids)

kf = KFold(n_splits=10)
for i, (train_index, test_index) in enumerate(kf.split(all_ids)):
    train_ids = all_ids[train_index]
    test_ids = all_ids[test_index]

    # estrai randomicamente gli elementi di validazione dal training set
    # in numero uguale agli elementi di testing e rimuovili dal set di training

    valid_ids = np.array(random.sample(train_ids.tolist(), len(test_ids)))
    train_ids = np.array([id for id in train_ids if id not in valid_ids])

    fold = {
        "train": train_ids.tolist(),
        "test": test_ids.tolist(),
        "valid": valid_ids.tolist(),
    }

    split["split"][f"fold_{i}"] = fold

# convert split into a dictonary and dave it as yaml file


# Save the dictionary as a YAML file
with open("split.yaml", "w") as file:
    yaml.dump(split, file)

[100 101 102 103 104 105 106 107 108 109 111 112 113 114 115 116 117 118
 119 121 122 123 124 200 201 202 203 205 207 208 209 210 212 213 214 215
 217 219 220 221 222 223 228 230 231 232 233 234]
[119 220 106 232 100 230 234 109 213 118 114 113 101 105 231 124 200 228
 205 214 223 215 208 121 203 111 112 123 108 122 116 221 212 233 117 107
 217 103 201 210 222 207 115 202 209 219]


In [1]:
from physioex.data.mitdb import MITBIH

db = MITBIH(use_cache=False)
db.split(0)
train, _, test = db.get_sets()

train[0][0].shape

  warn('Preprocessing choices with lambda functions cannot be saved.')
[32m2024-03-23 19:32:26.445[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36m__init__[0m:[36m130[0m - [1mFetching the dataset..[0m


720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720
720


[32m2024-03-23 19:32:29.697[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36mload_signal[0m:[36m105[0m - [1mClass distribution: Counter({0: 31528, 2: 1587, 1: 746, 3: 260, 4: 4})[0m
[32m2024-03-23 19:32:29.728[0m | [1mINFO    [0m | [36mphysioex.data.utils[0m:[36mwrite_cache[0m:[36m54[0m - [1mCaching dataset into temp/mitdb.pkl[0m


720
720


[32m2024-03-23 19:32:32.447[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36msplit[0m:[36m183[0m - [1mTrain shape X (26770, 1, 720), y (26770,)[0m
[32m2024-03-23 19:32:32.449[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36msplit[0m:[36m184[0m - [1mValid shape X (3765, 1, 720), y (3765,)[0m
[32m2024-03-23 19:32:32.450[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36msplit[0m:[36m185[0m - [1mTest shape X (3590, 1, 720), y (3590,)[0m


(1, 720)

In [9]:
db.split(0)
train, _, test = db.get_sets()

train[0][0].shape

[32m2024-03-23 19:24:14.179[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36msplit[0m:[36m174[0m - [1mTrain shape X (12996, 1, 1800), y (12996,)[0m
[32m2024-03-23 19:24:14.181[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36msplit[0m:[36m175[0m - [1mValid shape X (1805, 1, 1800), y (1805,)[0m
[32m2024-03-23 19:24:14.182[0m | [1mINFO    [0m | [36mphysioex.data.mitdb[0m:[36msplit[0m:[36m176[0m - [1mTest shape X (1805, 1, 1800), y (1805,)[0m


-1

In [4]:
import torch
import torch.nn as nn

batch_size = 32


class EpochEncoder(nn.Module):
    def __init__(self, module_config) -> None:
        super(EpochEncoder, self).__init__()

        self.conv1 = nn.Conv1d(
            in_channels=module_config["in_channels"],
            out_channels=32,
            kernel_size=10,
            stride=5,
        )
        self.conv2 = nn.Conv1d(
            in_channels=32, out_channels=16, kernel_size=10, stride=5
        )
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=8, kernel_size=10, stride=5)

    def forward(self, x):
        x = self.conv1(x)
        x = nn.ReLU()(x)
        x = self.conv2(x)
        x = nn.ReLU()(x)
        x = self.conv3(x)
        x = nn.ReLU()(x)
        x = x.reshape(x.size(0), -1)
        return x


module_config = {
    "in_channels": 1,
}


encoder = EpochEncoder(module_config)
input_size = (batch_size, 1, 360 * 2)
output = encoder(torch.zeros(input_size))
output_size = output.size()

print(output_size)

torch.Size([32, 32])


In [32]:
from os import listdir, mkdir, system
from os.path import isfile, isdir, join, exists


def convert_mitdb_2_csv():
    home_dir = os.path.expanduser("~")
    dir = os.path.join(home_dir, "mitdb")
    # Create folder
    csv = dir + "/csv/"
    if not exists(csv):
        mkdir(csv)

    records = [
        f for f in listdir(dir) if isfile(join(dir, f)) if (f.find(".dat") != -1)
    ]
    # print records

    for r in records:

        command = "rdsamp -r " + r[:-4] + " -c -H -f 0 -v >" + csv + r[:-4] + ".csv"
        print(command)
        system(command)

        command_annotations = (
            "rdann -r "
            + r[:-4]
            + " -f 0 -a atr -v >"
            + csv
            + r[:-4]
            + "annotations.txt"
        )
        print(command_annotations)
        system(command_annotations)


convert_mitdb_2_csv()

rdsamp -r 222 -c -H -f 0 -v >/home/guido/mitdb/csv/222.csv
rdann -r 222 -f 0 -a atr -v >/home/guido/mitdb/csv/222annotations.txt
rdsamp -r 113 -c -H -f 0 -v >/home/guido/mitdb/csv/113.csv
rdann -r 113 -f 0 -a atr -v >/home/guido/mitdb/csv/113annotations.txt
rdsamp -r 200 -c -H -f 0 -v >/home/guido/mitdb/csv/200.csv
rdann -r 200 -f 0 -a atr -v >/home/guido/mitdb/csv/200annotations.txt
rdsamp -r 202 -c -H -f 0 -v >/home/guido/mitdb/csv/202.csv
rdann -r 202 -f 0 -a atr -v >/home/guido/mitdb/csv/202annotations.txt
rdsamp -r 114 -c -H -f 0 -v >/home/guido/mitdb/csv/114.csv
rdann -r 114 -f 0 -a atr -v >/home/guido/mitdb/csv/114annotations.txt
rdsamp -r 104 -c -H -f 0 -v >/home/guido/mitdb/csv/104.csv
rdann -r 104 -f 0 -a atr -v >/home/guido/mitdb/csv/104annotations.txt
rdsamp -r 122 -c -H -f 0 -v >/home/guido/mitdb/csv/122.csv
rdann -r 122 -f 0 -a atr -v >/home/guido/mitdb/csv/122annotations.txt
rdsamp -r 109 -c -H -f 0 -v >/home/guido/mitdb/csv/109.csv
rdann -r 109 -f 0 -a atr -v >/home/gui

sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not found
sh: 1: rdann: not found
sh: 1: rdsamp: not f

In [18]:
from ecglib.data import load_physionet2020, EcgRecord
import os
from loguru import logger

path = "~/physionet2020/ptb-xl/"
path = os.path.expanduser(path)

if not os.path.exists(path):
    logger.info("Creating directory for physionet2020 data.")
    os.makedirs(path)


data_info = load_physionet2020(path_to_zip=path, path_to_unzip=path, delete_zip=True)

Unzipping started...


FileNotFoundError: [Errno 2] No such file or directory: '/home/guido/physionet2020/ptb-xl/classification-of-12-lead-ecgs-the-physionetcomputing-in-cardiology-challenge-2020-1.0.2.zip'

In [9]:
for record in data_info:
    print(record)

TypeError: 'NoneType' object is not iterable

In [2]:
# setup the working directory
import os

working_dir = "/content/drive/MyDrive/Thesis"
os.chdir(working_dir)

In [5]:
import numpy as np

m = np.array([[1, 1, 0], [0, 1, 0], [1, 0, 1]])
print(np.where(m == 1))

(array([0, 0, 1, 2, 2]), array([0, 1, 1, 0, 2]))


In [None]:
# follow the guide on https://guidogagl.github.io/physioex/
# clone your forked repo
!git clone https://github.com/guidogagl/physioex.git
%cd physioex

!git pull origin main

fatal: destination path 'physioex' already exists and is not an empty directory.
/content/drive/MyDrive/Thesis/physioex


In [None]:
# install the library in development mode
!pip install -e .

In [None]:
# train the model proposed by chambon2018 on the DREEM dataset ( DODH version )
!train -e chambon2018 --dataset dreem --version dodh -vci 30 --sequence_lenght 3 -nj 2 --checkpoint "models/cel/chambon2018/seqlen=3/dreem/dodh/"