In [1]:
from helper_code import *
import numpy as np, os, sys, joblib
import ecg_plot
import pandas as pd
from glob import glob
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tsai.all import *
import torch
import optuna
from optuna.integration import FastAIPruningCallback
from sklearn.metrics import classification_report
import transformation_funcs as tfs
import seaborn as sns
from torchsummary import summary
import argparse


  from .autonotebook import tqdm as notebook_tqdm


In [36]:
torch.cuda.set_device(0)
datasets = ["PTBXL","ChapmanShaoxing","CPSC2018",]
norm_type = "minmax"
max_len = 8000
sf = 0.5
scale_type = "nearest"
architecture = "inception"
DATASET_ID = "CPSC2018"
transforms = ["sc","n"]
batch_tfms = []

processing_type = '-'.join([x for x in transforms])

if "sc" in transforms:
    batch_tfms.append(tfs.Scale(scale_factor=sf,mode=scale_type))

if "n" in transforms:
    if norm_type == "minmax":
        batch_tfms.append(tfs.NormMinMax())
    if norm_type == "maxdiv":
        batch_tfms.append(tfs.NormMaxDiv())
    if norm_type == "zscore":
        batch_tfms.append(tfs.NormZScore())
    if norm_type == "median":
        batch_tfms.append(tfs.NormMedian())
    if norm_type == "deci_scale":
        batch_tfms.append(tfs.NormDecimalScaling())
        
if "bp" in transforms:
    batch_tfms.append(tfs.BandPass(int(sf*500),low_cut=50, high_cut=1,leads=12,))
if "sh" in transforms:
    batch_tfms.append(tfs.RandomShift(0.1))
if len(transforms)==0:
    processing_type = "raw"
print("transforms:",[x.name for x in batch_tfms])
print(processing_type)

transforms: ['Scale', 'NormMinMax']
sc-n


In [66]:
# ### for all datasets, get labels that are common
# for x in ["PTBXL","ChapmanShaoxing","CPSC2018",]:
#     DATASET_ID = x
#     DATASET_NAME = "WFDB_%s_signitured"%DATASET_ID
#     X = np.load('./data/big_numpy_datasets/%s.npy'%DATASET_NAME, mmap_mode='c')
#     label_df = pd.read_csv("data/%s.csv"%DATASET_NAME).drop(columns=["headers","leads"])
#     y = snomedConvert(label_df)
#     y=y[(y.columns[y.sum()>1000])]
#     print(X.shape)
#     print(x)
#     print(y.sum())
    

In [89]:
# create an array for a label which can be used for single label classification
DATASET_ID = x
DATASET_NAME = "WFDB_%s_signitured"%DATASET_ID
X = np.load('./data/big_numpy_datasets/%s.npy'%DATASET_NAME, mmap_mode='c')
label_df = pd.read_csv("data/%s.csv"%DATASET_NAME).drop(columns=["headers","leads"])
y = snomedConvert(label_df)
y = y[(y.columns[y.sum()>1000]) & (y.columns[y.sum()<0.5*len(y)])]
y

Unnamed: 0,right bundle branch block,atrial fibrillation
0,True,False
1,False,False
2,False,True
3,False,True
4,False,False
...,...,...
6872,False,False
6873,False,False
6874,False,False
6875,False,True


In [118]:
# this function takes a label, and returns a balanced dataset 
# where half the data has the label and the other does not
label = y.columns[0]
y_label = y[label]

have_index = np.array(y_label[y_label==True].index)
have_not_index = np.array(list(set(y_label.index).difference(set(have_index))))

# if have_index bigger than 50% of the total ecgs,
# then we can pick with replacement or skip (this is rare)
if len(have_index) > 0.5 * len(y_label):
    have_not_index = np.random.choice(have_not_index,len(have_index),replace=True)
else:
    have_not_index = np.random.choice(have_not_index,len(have_index),replace=False)

num_have = len(have_index)
num_have_not = len(have_not_index)
print("for %s: %s ecgs, %s have %s don't"%(label,len(y_label), num_have,num_have_not))

# this variable will contain all the indices we are interested in
selected_indices = np.concatenate([have_index,have_not_index])
selected_indices = np.sort(selected_indices)

X_label = X[selected_indices] # X for experiment

for right bundle branch block: 6877 ecgs, 1857 have 1857 don't


In [174]:
# now we can start the experiment

dsets = TSDatasets(X_label.astype(float)[:,:,0:max_len], y_multi, tfms=tfms, splits=cv_splits[cv_num]) # inplace=True by default
dls   = TSDataLoaders.from_dsets(dsets.train,dsets.valid, bs=[64, 128], batch_tfms=batch_tfms, num_workers=0)
metrics =[accuracy_multi, balanced_accuracy_multi, precision_multi, recall_multi, specificity_multi, F1_multi]
if architecture == "inception":
    model = InceptionTimePlus(dls.vars, dls.c, dls.len, depth=12, ks = 130,nf=32 )
elif architecture == "minirocket":
    model = MiniRocketPlus(dls.vars, dls.c,dls.len)
# try : loss_func = BCEWithLogitsLossFlat(pos_weight=dls.train.cws.sqrt())

learn = Learner(dls, model, metrics=metrics,
#                     opt_func = wrap_optimizer(torch.optim.Adam,weight_decay=6.614e-07),
                cbs=[fastai.callback.all.SaveModelCallback(monitor="F1_multi",fname="%s_%s_%s_%s_%s_%s_%s"%(architecture,DATASET_ID,processing_type,sf,scale_type,norm_type,cv_num))],
                model_dir="models/5CV/")
with learn.no_logging():
    with learn.no_bar():
        learn.fit_one_cycle(300, lr_max=0.01)


array([[[   28,    39,    45, ...,     0,     0,    12],
        [    7,    11,    15, ...,     0,     0,    12],
        [  -21,   -28,   -30, ...,     0,     0,    12],
        ...,
        [ -112,  -110,  -108, ...,     0,     0,    12],
        [ -596,  -590,  -582, ...,     0,     0,    12],
        [  -16,    -7,     2, ...,     0,     0,    12]],

       [[  -53,   -53,   -55, ...,     0,     0,    12],
        [ -117,  -111,  -106, ...,     0,     0,    12],
        [  -64,   -58,   -51, ...,     0,     0,    12],
        ...,
        [  -54,   -44,   -34, ...,     0,     0,    12],
        [  -70,   -62,   -55, ...,     0,     0,    12],
        [ -158,  -152,  -148, ...,     0,     0,    12]],

       [[  159,   234,   214, ...,     0,     0,    12],
        [   68,    97,    85, ...,     0,     0,    12],
        [  -91,  -137,  -129, ...,     0,     0,    12],
        ...,
        [ -103,  -143,  -125, ...,     0,     0,    12],
        [  -25,   -32,   -27, ...,     0,    