In [1]:
import os
import sys
import numpy as np
import pandas as pd
import datetime
from scipy import interpolate
from scipy.integrate import simps

sys.path.insert(0, '..')
sys.path.append('./sleep_general')
from mgh_sleeplab import read_psg_from_bdsp, load_mgh_signal, annotations_preprocess, vectorize_respiratory_events, vectorize_sleep_stages, vectorize_arousals, vectorize_limb_movements
from sleep_analysis_functions import compute_spo2_clean, compute_hypoxia_burden, hypoxaemic_burden_minutes
from tqdm import tqdm

In [2]:
# base_folder = '../project-files/bdsp-opendata-psg/data/S0001/'
base_folder = '../project-files/PSG/data/S0001/'

assert os.path.exists(base_folder)

In [3]:
if 0:
    df = pd.read_excel('../project-files/bdsp-staging-area/wolfgang/mastersheet_outcome_deid.xlsx')
else:
    df = pd.read_csv('outcomes_hypoxia.csv')

In [4]:
df.hypoxia_note.value_counts(dropna=False)

all good                                                       8313
read_psg_from_bdsp failed: len(data_folder) = 0                 319
read_psg_from_bdsp failed: len(data_folder) = 2                  19
NaN                                                              10
axis 1 is out of bounds for array of dimension 1                  6
index 5637600 is out of bounds for axis 0 with size 5637600       1
index 2272800 is out of bounds for axis 0 with size 2272800       1
local variable 'samples_limit' referenced before assignment       1
index 5142400 is out of bounds for axis 0 with size 5142400       1
cannot convert float NaN to integer                               1
Name: hypoxia_note, dtype: int64

In [5]:
for jloc, row in tqdm(df.iterrows()):
    
    try:

        if pd.notna(row.hypoxia_note): continue # already done
        
        sid, dov = row[['HashID', 'DOVshifted']].values
#         dov = str(dov.date()).replace('-', '')
        assert len(dov) < 12
        dov = str(dov).replace('-', '')

        # load and prepare data
        signal, annotations, params = read_psg_from_bdsp(sid, dov, base_folder=base_folder)

        fs = int(params['Fs'])
        signal_len = len(signal)

        annotations = annotations_preprocess(annotations, fs)
        resp = vectorize_respiratory_events(annotations, signal_len)
        stage = vectorize_sleep_stages(annotations, signal_len)
        arousal = vectorize_arousals(annotations, signal_len)
        limb = vectorize_limb_movements(annotations, signal_len)
        data = signal
        data['apnea'] = resp

        dt_start = pd.Timestamp('2000-01-01 00:00:00')
        dt_end = dt_start + datetime.timedelta(seconds=(data.shape[0]-1) / fs)
        pseudo_dt_index = pd.date_range(start=dt_start, end=dt_end, periods=data.shape[0])
        data.index = pseudo_dt_index

        # compute hypoxia variables
        data = compute_spo2_clean(data, fs=fs)
        data['spo2'] = data['spo2_clean']
        data['apnea_binary'] = np.isin(data['apnea'],[1,2,3,4]).astype(int)
        data['apnea_end'] = np.isin(data['apnea_binary'].diff(), [-1])
        stage = stage[np.logical_not(pd.isna(stage))]
        hours_sleep = sum(stage<5)/fs/3600
        data, hypoxia_burden = compute_hypoxia_burden(data, fs, hours_sleep=hours_sleep, apnea_name='apnea')
        T90burden, T90desaturation, T90nonspecific = hypoxaemic_burden_minutes(data['spo2'].values, fs)

        df.loc[jloc, 'hypoxia_burden'] = hypoxia_burden
        df.loc[jloc, 'hypoxia_T90'] = T90burden
        df.loc[jloc, 'hypoxia_T90desat'] = T90desaturation
        df.loc[jloc, 'hypoxia_T90nonspecific'] = T90nonspecific
        df.loc[jloc, 'hypoxia_note'] = 'all good'
        
        df.to_csv('outcomes_hypoxia.csv', index=False)
        
    except Exception as e:
        print(jloc, sid, e)
        df.loc[jloc, 'hypoxia_note'] = e
        continue

647it [00:19, 44.25it/s]

130 2056e235a5ebb6cf2f0c3c0f816078542e58baec2198d6a7ee5e0bca22413480 read_psg_from_bdsp failed: len(annotations_path) = 0
646 cea64eff9989a69b9c84f33a13a9a2fe779339ef59cfb90c0a1935114f3bed40 read_psg_from_bdsp failed: len(annotations_path) = 0


3611it [00:19, 402.56it/s]

3610 9d9a9e1c5223f9781bb46288f7e697bd2dd800582cdf7bbae7e37ba26bf40fa8 read_psg_from_bdsp failed: len(annotations_path) = 0
3689 620514a1081177747a79cb59933914f4be7645a027d96e90530d39c0e0d8e7ad read_psg_from_bdsp failed: len(annotations_path) = 0
3763 37b81f5db1c89f133152f12ee08700b46d73a75ad81e9951505ca7a06cd0bedb read_psg_from_bdsp failed: len(annotations_path) = 0


4622it [00:20, 570.68it/s]

3898 c97dc33e7f3908810bb176f607dddcb127f82644f830d25848bbc04c8d961885 read_psg_from_bdsp failed: len(annotations_path) = 0
4466 7f35717b5d72b146f2d9375f5fca63503c0f755dbb71839eb3d20bd7a0c0ffff read_psg_from_bdsp failed: len(annotations_path) = 0


6555it [00:20, 1102.97it/s]

5782 5a08559758897337a7160cf6974b98a427f726744c125aab68736cf9a1073d52 read_psg_from_bdsp failed: len(annotations_path) = 0
6029 b7f94ee0aa84ef6adc9d9ef0277b822752d63ce12b0892956591b0774583958a read_psg_from_bdsp failed: len(annotations_path) = 0


8672it [00:20, 420.75it/s] 

8074 113ccaa0468a511103862678049813577c3c1294ff2cf762f8ae96dee14d321b read_psg_from_bdsp failed: len(annotations_path) = 0





In [5]:
df = pd.read_csv('outcomes_hypoxia.csv')

In [6]:
df.hypoxia_note.value_counts(dropna=False)

all good                                                       8313
read_psg_from_bdsp failed: len(data_folder) = 0                 319
read_psg_from_bdsp failed: len(data_folder) = 2                  19
axis 1 is out of bounds for array of dimension 1                  6
read_psg_from_bdsp failed: len(annotations_path) = 0              1
read_psg_from_bdsp failed: len(annotations_path) = 0              1
index 5142400 is out of bounds for axis 0 with size 5142400       1
local variable 'samples_limit' referenced before assignment       1
index 2272800 is out of bounds for axis 0 with size 2272800       1
read_psg_from_bdsp failed: len(annotations_path) = 0              1
read_psg_from_bdsp failed: len(annotations_path) = 0              1
index 5637600 is out of bounds for axis 0 with size 5637600       1
read_psg_from_bdsp failed: len(annotations_path) = 0              1
read_psg_from_bdsp failed: len(annotations_path) = 0              1
read_psg_from_bdsp failed: len(annotations_path)