In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import vitaldb
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_trks = pd.read_csv('https://api.vitaldb.net/trks')  # read track list
df_cases = pd.read_csv("https://api.vitaldb.net/cases")  # read case information

# inclusion & exclusion criteria
caseids = list(
                set(df_trks[df_trks['tname'] == 'SNUADC/ART']['caseid']) &
                set(df_trks[df_trks['tname'] == 'SNUADC/PLETH']['caseid']) &
                set(df_trks[df_trks['tname'] == 'EV1000/SV']['caseid']) &
                set(df_trks[df_trks['tname'] == 'EV1000/CO']['caseid']) &
                set(df_trks[df_trks['tname'] == 'EV1000/SVV']['caseid']) &
                set(df_cases[df_cases['age'] > 18]['caseid'])
)

print('Total {} cases found'.format(len(caseids)))
np.random.shuffle(caseids)  # shuffle caseids

Total 541 cases found


In [None]:
MINUTES_AHEAD = 1  # Predict hypotension 1 minutes ahead
BATCH_SIZE = 256  # Batch size for learning
MAX_CASES = 10  # Maximum number of cases for this example
SRATE = 100  # sampling rate for the arterial waveform signal
SEUIL= 70 # choix du seuil d'hypotension

# Final dataset for training and testing the model
x = []  # input with shape of (segements, timepoints)
y = []  # output with shape of (segments)
valid_mask = []  # validity of each segement
c = []  # caseid of each segment
x1=[]
# maximum number of cases

for caseid in caseids:
    print(f'loading {caseid}', end='...', flush=True)


    #for caseid in liste_ids['Caseid'].tolist()[:40]:
    print(f'loading {caseid}', end='...', flush=True)# ancien pour cherhcer à partir de la litse

    # read the arterial waveform
    arts = vitaldb.load_case(caseid, ['SNUADC/ART'], 1/SRATE)
    plet = vitaldb.load_case(caseid, ['SNUADC/PLETH'], 1/SRATE)
    hr = vitaldb.load_case(caseid, ['Solar8000/PLETH_HR'], 1/SRATE)

    arts[arts<30] = np.nan
    case_valid_mask = ~np.isnan(arts)
    arts = arts[(np.cumsum(case_valid_mask) > 0) & (np.cumsum(case_valid_mask[::-1])[::-1] > 0)]

    #Filtering artifacts
    arts = arts[(arts>=20) & (arts <=180)].flatten()
    hr = hr[(hr>=30) & (hr <=160)].flatten()
    plet = plet[(plet>=-10) & (plet<=100)].flatten()

    case_sample = 0
    case_event = 0

    for i in range(0, len(arts) - SRATE * (20 + (1 + MINUTES_AHEAD) * 60), 10 * SRATE):
        segx = arts[i:i + SRATE * 20]
        segy = arts[i + SRATE * (20 + MINUTES_AHEAD * 60):i + SRATE * (20 + (MINUTES_AHEAD + 1) * 60)]
        seg2x = plet[i:i + SRATE * 20]
        seg2y = plet[i + SRATE * (20 + MINUTES_AHEAD * 60):i + SRATE * (20 + (MINUTES_AHEAD + 1) * 60)]

        # check the validity of this segment
        valid = True
        if np.isnan(segx).mean() > 0.1:
            valid = False
        if np.isnan(segy).mean() > 0.1:
            valid = False
        elif (segx > 200).any():
            valid = False
        elif (segy > 200).any():
            valid = False
        elif (segx < 30).any():
            valid = False
        elif (segy < 30).any():
            valid = False
        elif (seg2x > 100).any():
           valid = False
        elif (segx <-10).any():
           valid = False
        elif np.max(segx) - np.min(segx) < 30:
            valid = False
        elif np.max(segy) - np.min(segy) < 30:
            valid = False
        elif (np.abs(np.diff(segx)) > 30).any():  # abrupt change -> noise
            valid = False
        elif (np.abs(np.diff(segy)) > 30).any():  # abrupt change -> noise
            valid = False


        # 2 sec moving avg
        n = 2 * SRATE
        segy = np.nancumsum(segy, dtype=np.float32)
        segy[n:] = segy[n:] - segy[:-n]
        segy = segy[n - 1:] / n

        evt = np.nanmax(segy) < SEUIL

        x.append(seg2x)
        y.append(evt)
        valid_mask.append(valid)
        c.append(caseid)

        if valid:
            case_sample += 1
            if evt:
                case_event += 1

    if case_sample > 0:
        print("{} samples {} ({:.1f} %) events".format(case_sample, case_event, 100 * case_event / case_sample))
    else:
        print('no sample')

    #if len(np.unique(c)) >= MAX_CASES:
    #  break

# final caseids
caseids = np.unique(c)

# convert lists to numpy array

x = np.array(x)
y = np.array(y)
valid_mask = np.array(valid_mask)
c = np.array(c)

# forward filling
x = pd.DataFrame(x).fillna(method='ffill', axis=1).fillna(method='bfill', axis=1).values

# normalization
x -= SEUIL
x /= SEUIL

# add axis for CNN
x = x[...,None]

print(x.shape)
print(y.shape)

loading 2356...loading 2356...

In [None]:
np.save('x_70.npy', x)
np.save('y_70.npy', y)