/
bandpass_experiment.py
135 lines (119 loc) · 5.64 KB
/
bandpass_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from helper_code import *
import numpy as np, os, sys, joblib
import ecg_plot
import pandas as pd
from glob import glob
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tsai.all import *
import torch
import optuna
from optuna.integration import FastAIPruningCallback
from sklearn.metrics import classification_report
import transformation_funcs as tfs
import seaborn as sns
from torchsummary import summary
import argparse
parser = argparse.ArgumentParser(description='experiment parameters')
parser.add_argument('--batch_tfms',nargs="+",default=[],help="input list of ints ->n:normalize sc:scale bp:bandpass sh:shift")
parser.add_argument('--norm_type',default="minmax",help="normalization function (minmax, maxdiv,zscore,median,deci_scale)")
parser.add_argument('--max_len',default=8000,type=int,help="max_len of ecgs")
parser.add_argument('--HP',default=1,type=int,help="high pass filter")
parser.add_argument('--LP',default=50,type=int,help="low pass filter")
parser.add_argument('--scale',type=float,default=0.5,help="down/upsample scale")
parser.add_argument('--scale_type',default="nearest-exact",help="nearest / nearest-exact / area")
parser.add_argument('--gpu_num',default=0,type=int,help="gpu device")
parser.add_argument('--arch',default="inception",help="inception or minirocket")
parser.add_argument('--dataset',default="CPSC2018",help="CPSC2018 or chapmanshaoxing or PTBXL")
parser.add_argument('--cv_range',default=[0,1,2,3,4],nargs="+",type=int,help="folds to train")
args = parser.parse_args()
print("pre-processing funcs: ",args.batch_tfms)
print("gpu num :",args.gpu_num)
print("scaling :",args.scale)
print("training folds:",args.cv_range)
print("max len:",args.max_len)
print("dataset",args.dataset)
torch.cuda.set_device(args.gpu_num)
norm_type = args.norm_type
max_len = args.max_len
sf = args.scale
LP = args.LP
HP = args.HP
scale_type = args.scale_type
cv_range = args.cv_range
architecture = args.arch
DATASET_ID = args.dataset
transforms = args.batch_tfms
batch_tfms = []
processing_type = '-'.join([x for x in transforms])
if "sc" in transforms:
batch_tfms.append(tfs.Scale(scale_factor=sf,mode=scale_type))
if "n" in transforms:
if norm_type == "minmax":
batch_tfms.append(tfs.NormMinMax())
if norm_type == "maxdiv":
batch_tfms.append(tfs.NormMaxDiv())
if norm_type == "zscore":
batch_tfms.append(tfs.NormZScore())
if norm_type == "median":
batch_tfms.append(tfs.NormMedian())
if norm_type == "deci_scale":
batch_tfms.append(tfs.NormDecimalScaling())
if "bp" in transforms:
batch_tfms.append(tfs.BandPass(int(sf*500),low_cut=LP, high_cut=HP,leads=12,))
if "sh" in transforms:
batch_tfms.append(tfs.RandomShift(0.1))
if len(transforms)==0:
processing_type = "raw"
print("transforms:",[x.name for x in batch_tfms])
print(processing_type)
DATASET_NAME = "WFDB_%s_signitured"%DATASET_ID
X = np.load('./data/big_numpy_datasets/%s.npy'%DATASET_NAME, mmap_mode='c')
label_df = pd.read_csv("data/%s.csv"%DATASET_NAME).drop(columns=["headers","leads"])
y = snomedConvert(label_df)
y = y[y.columns[y.sum()>0.05*len(y)] ]
cv_splits = get_splits(y.to_numpy(), n_splits = 20, valid_size=.1,test_size=0.1, stratify=False, random_state=23, shuffle=True)
y_multi = []
for i,row in y.iterrows():
sample_labels = []
for i,r in enumerate(row):
if r == True:
sample_labels.append(y.columns[i])
y_multi.append(list(tuple(sample_labels)))
label_counts = collections.Counter([a for r in y_multi for a in r])
print('Counts by label:', dict(label_counts))
tfms = [None, TSMultiLabelClassification()]
for cv_num in range(20):
# cv_num = cv_num + 0
dsets = TSDatasets(X.astype(float)[:,:,0:max_len], y_multi, tfms=tfms, splits=cv_splits[cv_num]) # inplace=True by default
dls = TSDataLoaders.from_dsets(dsets.train,dsets.valid, bs=[64, 128], batch_tfms=batch_tfms, num_workers=0)
metrics =[precision_multi, recall_multi, specificity_multi, F1_multi]
if architecture == "inception":
model = InceptionTimePlus(dls.vars, dls.c, dls.len,)
elif architecture == "minirocket":
model = MiniRocketPlus(dls.vars, dls.c,dls.len)
elif architecture == "xresnet1d101":
model = xresnet1d101(dls.vars, dls.c)
# try : loss_func = BCEWithLogitsLossFlat(pos_weight=dls.train.cws.sqrt())
model_name = "%s_%s_%s_%s_%s-%s_%s"%(architecture,DATASET_ID,processing_type,sf,HP,LP,cv_num)
learn = Learner(dls, model, metrics=metrics,
# opt_func = wrap_optimizer(torch.optim.Adam,weight_decay=6.614e-07),
cbs=[fastai.callback.all.SaveModelCallback(
monitor="F1_multi",fname=model_name),
fastai.callback.all.EarlyStoppingCallback(monitor='F1_multi', min_delta=0.005, patience=50)
],
model_dir="models/bandpassing/")
learn.fit_one_cycle(300, lr_max=0.01,)
# now test it on test set
learn.load(model_name)
fold_splits = cv_splits[cv_num]
dsets = TSDatasets(X.astype(float)[:,:,0:max_len], y_multi, tfms=tfms, splits=(fold_splits[0],fold_splits[2])) # inplace=True by default
dls = TSDataLoaders.from_dsets(dsets.train,dsets.valid, bs=[128, 128], batch_tfms=batch_tfms, num_workers=0)
valid_probas, valid_targets, valid_preds = learn.get_preds(dl=dls.valid, with_decoded=True)
y_pred = (valid_preds>0)
y_test = valid_targets
report = classification_report(y_test, y_pred,target_names = dls.vocab.o2i.keys(),digits=3,output_dict=True)
df = pd.DataFrame(report).reset_index()
df.to_csv("models/bandpassing/csvs/%s.csv"%model_name,index=False)
df