![alt text](./pageheader_rose2_babies.jpg)

# SIPPV-VG ventilation data

#### Author: Dr Gusztav Belteki

This notebook imports the files from pickle archive *slow_measurements_sippv_1, slow_measurements_sippv_2 and slow_measurements_sippv_3* generated by notebook **SIPPV_all.ipynb**, containing all the SIPPV data from of the first service evaluation (**DG001-DG060**). They are stored as dictionaries of DataFrames. This notebook then selects and keeps **only VG periods**. It also keeps only the relevant ventilator parameters and combines the Dataframes into one. After some more preprocessing the data it exports the data to pickle archive: *slow_measurements_sippv_vg_1* and *slow_measurements_sippv_vg_2*. Exporting it to two archives in necessary due to the amount of data.

*Preprocessing done on the data:*

*  Only SIPPV data are kept (this is done in part by manual lookup of ventilator settings)
*  Remove recordings which had < 12 hours duration
*  If there is more than one recording from one patient keep only the longer one
*  Add Pmax and VTset_kg to the DataFrames
*  Add VT_diff (VTmand_kg - VTset_kg) to the DataFrames
*  Add Pdiff (Pmax - PIP) to the DataFrames



### Importing the necessary libraries and setting options

In [None]:
import IPython
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import sys
import pickle
import scipy as sp
from scipy import stats
from pandas import Series, DataFrame
from datetime import datetime, timedelta

%matplotlib inline
matplotlib.style.use('classic')
matplotlib.rcParams['figure.facecolor'] = 'w'

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

### Importing custom functions from own module

In [None]:
from gb_loader import *
from gb_transform import *
from gb_stats import *
from gb_visualizer import *

In [None]:
print("Python version: {}".format(sys.version))
print("IPython version: {}".format(IPython.__version__))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("SciPy version: {}".format(sp.__version__))

### List and set the working directory and the directory to write out data

In [None]:
# Topic of the Notebook which will also be the name of the subfolder containing results
TOPIC = 'VG'

# Name of the external hard drive
DRIVE = 'GUSZTI'

# Directory containing clinical and blood gas data
CWD = '/Users/guszti/ventilation_draeger'

# Directory on external drive to read the ventilation data from
DIR_READ1 = '/Volumes/%s/data_dump/draeger/SIPPV_all' % DRIVE # processed slow measurement pickled data
DIR_READ2 = '/Volumes/%s/Draeger/service_evaluation_old' % DRIVE # ventilator settings and modes 
DIR_READ3 = '/Users/guszti/ventilation_draeger'

# Directory to write results and selected images to 
if not os.path.isdir('%s/%s/%s' % (CWD, 'Analyses', TOPIC)):
    os.makedirs('%s/%s/%s' % (CWD, 'Analyses', TOPIC))
DIR_WRITE = '%s/%s/%s' % (CWD, 'Analyses', TOPIC)

# Images and raw data will be written on an external hard drive
if not os.path.isdir('/Volumes/%s/data_dump/draeger/%s' % (DRIVE, TOPIC)):
    os.makedirs('/Volumes/%s/data_dump/draeger/%s' % (DRIVE, TOPIC))
DATA_DUMP = '/Volumes/%s/data_dump/draeger/%s' % (DRIVE, TOPIC)

In [None]:
os.chdir(CWD)
os.getcwd()

In [None]:
DIR_READ1

In [None]:
DIR_READ2

In [None]:
DIR_READ3

In [None]:
DIR_WRITE

In [None]:
DATA_DUMP

## Import ventilator data

### Import 'slow_measurements' data from pickle archive

In [None]:
with open('%s/%s.pickle' % (DIR_READ1, 'slow_measurements_sippv_1'), 'rb') as handle:
    slow_measurements_1 = pickle.load(handle)

with open('%s/%s.pickle' % (DIR_READ1, 'slow_measurements_sippv_2'), 'rb') as handle:
    slow_measurements_2 = pickle.load(handle)

with open('%s/%s.pickle' % (DIR_READ1, 'slow_measurements_sippv_3'), 'rb') as handle:
    slow_measurements_3 = pickle.load(handle)

### Combine dictionaries into one dictionary and delete the subdictionaries to save memory

In [None]:
slow_measurements = {}
slow_measurements.update(slow_measurements_1)
slow_measurements.update(slow_measurements_2)
slow_measurements.update(slow_measurements_3)

In [None]:
del slow_measurements_1, slow_measurements_2, slow_measurements_3

In [None]:
len(slow_measurements)

In [None]:
recordings = sorted(slow_measurements.keys())
print(recordings)

### Import clinical details

In [None]:
clinical_details = pd.read_excel('%s/data_grabber_patient_data_combined_old.xlsx' % CWD)
clinical_details.index = clinical_details['Recording']

In [None]:
clinical_details.info()

In [None]:
current_weights = {}
for recording in recordings:
    current_weights[recording] = clinical_details.loc[recording, 'Current weight' ] / 1000

### Import ventilator modes and settings

In [None]:
vent_modes = {}

for recording in recordings:
    flist = os.listdir('%s/%s' % (DIR_READ2, recording))
    flist = [file for file in flist if not file.startswith('.')] # There are some hidden 
    # files on the hard drive starting with '.'; this step is necessary to ignore them
    files = slow_text_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (DIR_READ2, recording, filename) for filename in files]
    vent_modes[recording] =  data_loader(fnames)

In [None]:
vent_modes_selected = {} # only important mode parameters are kept in this one

for recording in recordings:
    vent_modes_selected[recording] = vent_mode_cleaner(vent_modes[recording])

In [None]:
vent_settings = {}

for recording in recordings:
    flist = os.listdir('%s/%s' % (DIR_READ2, recording))
    flist = [file for file in flist if not file.startswith('.')] # There are some hidden 
    # files on the hard drive starting with '.'; this step is necessary to ignore them
    files = slow_setting_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (DIR_READ2, recording, filename) for filename in files]
    vent_settings[recording] =  data_loader(fnames)

In [None]:
vent_settings_selected = {} # only important mode parameters are kept in this one

for recording in recordings:
    vent_settings_selected[recording] = vent_settings_cleaner(vent_settings[recording])

### Import alarm settings

In [None]:
alarm_settings = {}

for recording in recordings:
    flist = os.listdir('%s/%s' % (DIR_READ2, recording))
    flist = [file for file in flist if not file.startswith('.')] # There are some hidden 
    # files on the hard drive starting with '.'; this step is necessary to ignore them
    files = alarm_setting_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (DIR_READ2, recording, filename) for filename in files]
    alarm_settings[recording] =  data_loader(fnames)

In [None]:
# Remove the unnecessary etCO2 limits which are not used currently

alarm_settings_selected = {} # etCO2 alarm settings are removed as this is not used

for recording in recordings:
    alarm_settings_selected[recording] = alarm_settings[recording][alarm_settings[recording].Id != 'etCO2_LL']
    alarm_settings_selected[recording] = \
        alarm_settings_selected[recording][alarm_settings_selected[recording].Id != 'etCO2_HL']
    alarm_settings_selected[recording].drop_duplicates(['Rel.Time [s]', 'Name'], inplace = True)

### Import alarm states

In [None]:
alarm_states = {}

for recording in recordings:
    flist = os.listdir('%s/%s' % (DIR_READ2, recording))
    flist = [file for file in flist if not file.startswith('.')] # There are some hidden 
    # files on the hard drive starting with '.'; this step is necessary to ignore them
    files = alarm_state_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (DIR_READ2, recording, filename) for filename in files]
    alarm_states[recording] =  data_loader(fnames)

### Identify and remove recordings or parts of recordings when the VG was off

As in the downloaded data it is not marked when VG was turned off, only when VG was turned on, the presence and timings of VG can only be inferred from vent_settings:

 - when Pmax appears VG is turned on 
 - when Pinsp appears VG is turned off

In [None]:
# Limit ventilation settings for the SIPPV periods
vent_settings_selected_sippv = {}

for recording in recordings:
    start = slow_measurements[recording].index[0]
    end = slow_measurements[recording].index[-1]
    
    vent_settings_selected_sippv[recording] = vent_settings_selected[recording][start: end]

In [None]:
# Check if the recordings do or do not contain periods without volume guarantee ( = VG) 
# and collect their name in separate lists.

vg_only = []
has_no_vg = []

for recording in recordings:
    a = (vent_settings_selected_sippv[recording]['Name'])
    b = (vent_modes_selected[recording]['Text'])
    if 'Pinsp' not in a.values and '/VG' in b.values:
        vg_only.append(recording)
    else:
        has_no_vg.append(recording)
        # print('%s contains non-volume guranteed conventional ventilation' % recording)

In [None]:
print(sorted(vg_only))

In [None]:
print(sorted(has_no_vg))

In [None]:
# Remove periods when the VG was off

slow_measurements['DG001'] = slow_measurements['DG001']['2015-09-25 13:42:46':'2015-09-25 18:21:55']
slow_measurements['DG013'] = slow_measurements['DG013']['2015-11-25 12:38:13':'2015-11-26 07:55:12']
slow_measurements['DG026'] = slow_measurements['DG026']['2016-01-27 18:14:42':'2016-01-29 08:57:49']
slow_measurements['DG027'] = slow_measurements['DG027']['2016-01-29 15:35:42':'2016-02-01 16:38:25']
slow_measurements['DG032_2'] = slow_measurements['DG032_2']['2016-03-24 13:45:35':'2016-03-26 02:06:24']
slow_measurements['DG045'] = slow_measurements['DG045']['2016-07-05 19:52:20':'2016-07-07 21:33:35']
slow_measurements['DG050'] = slow_measurements['DG050']['2016-09-05 11:11:56':'2016-09-05 11:12:06']
slow_measurements['DG056'] = slow_measurements['DG056']['2016-11-12 15:52:53':'2016-11-13 12:02:08']

del slow_measurements['DG002_1'] # This recording was completely non-VG
del slow_measurements['DG036'] # This short recording was completely non-VG
recordings = sorted(slow_measurements.keys()) # update recordings list

# For all other recordings VG was on throughout the SIPPV part of the recording.

### Remove recordings which had < 12 hours duration

In [None]:
len(recordings)

In [None]:
recording_duration = {}
for recording in recordings:
    recording_duration[recording] = len(slow_measurements[recording])

recording_duration_frame = DataFrame([recording_duration]).T
recording_duration_frame.columns = ['Length of recording (seconds)']

In [None]:
recording_duration_frame

In [None]:
slow_measurements = {recording: slow_measurements[recording] for recording in recordings if 
                     recording_duration[recording] >= 12 * 3600}

In [None]:
# Update recordings list
recordings = sorted(slow_measurements.keys())

In [None]:
len(recordings)

### If there is more than one recording from one patient keep only the longer one

In [None]:
del slow_measurements['DG051_2']
# Update recordings list
recordings = sorted(slow_measurements.keys())

In [None]:
len(recordings)

### Retrieving selected ventilation settings and normalising them to body weight

In [None]:
Pmax = {}
for recording in recordings:
    Pmax[recording] = vent_settings_selected[recording][vent_settings_selected[recording].Id == 'Pmax'].copy()
    Pmax[recording]['Pmax'] = Pmax[recording]['Value New']
    Pmax[recording] = Pmax[recording].reindex(slow_measurements[recording].index, method = 'ffill')

In [None]:
VT_set = {}
for recording in recordings:
    # 'VTi' is actually the target tidal leak-compensated expiratory volume
    # it is labelled as 'VTi' by error
    VT_set[recording] = vent_settings_selected[recording][vent_settings_selected[recording].Id == 'VTi'].copy()
    VT_set[recording]['VTset_kg'] = VT_set[recording]['Value New'] / current_weights[recording]
    VT_set[recording] = VT_set[recording].reindex(slow_measurements[recording].index, method = 'ffill')

In [None]:
set_values = {}
for recording in recordings:
    set_values[recording] = pd.concat([Pmax[recording]['Pmax'], VT_set[recording]['VTset_kg']], axis = 1)  

In [None]:
for recording in recordings:
    slow_measurements[recording] = pd.concat([slow_measurements[recording], set_values[recording]], 
                                             join = 'inner', axis = 1)

In [None]:
# Create a column in the dataframes containing the difference between the actual VT (expiratory or leak-compensated)
# as appropriate and the target VT; positive if VT actual > VT target, negative otherwise
for recording in recordings:
    slow_measurements[recording]['VT_diff'] = (slow_measurements[recording]['VTmand_kg'] - 
                                             slow_measurements[recording]['VTset_kg'])

In [None]:
# Create a column in the dataframes containing the difference between the allowed max pressure (Pmax) and 
# the actual PIP positive if PIP actual > Pmax target, negative otherwise
for recording in recordings:
    slow_measurements[recording]['P_diff'] = (slow_measurements[recording]['Pmax'] - 
                                             slow_measurements[recording]['PIP'])

### Filter the Dataframes to keep only selected parameters into one

In [None]:
columns_to_keep = ['FiO2', 'FlowDev', 'MV_kg', 'MVe_kg', 'MVemand_kg',  'MVespon_kg', 
                   'MVi_kg', 'MVleak_kg', 'MVspon%', 'PEEP', 'PIP', 'P_diff', 'Pmax', 
                   'Pmean', 'Pmin', 'RR', 'RR_set', 'RRmand', 'RRspon', 'VT_diff', 'VT_kg', 
                   'VTe_kg',  'VTemand_kg',  'VTespon_kg', 'VTi_kg', 'VTimand_kg',  
                   'VTispon_kg', 'VTmand_kg', 'VTset_kg',  'VTspon_kg', 'leak%', 'leak_comp', 
                   'leak_comp_ON', 'r2', 'recording']

In [None]:
total = []
for recording in recordings:
    slow_measurements[recording] = slow_measurements[recording][columns_to_keep]

### Write combined DataFrame to pickle archive

In [None]:
rec1 = recordings[:20]; rec2 = recordings[20:]

In [None]:
slow_measurements_1 = { key: value for key, value in slow_measurements.items() if key in rec1}
with open('%s/%s.pickle' % (DATA_DUMP, 'slow_measurements_sippv_vg_1'), 'wb') as handle:
    pickle.dump(slow_measurements_1, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
slow_measurements_2 = { key: value for key, value in slow_measurements.items() if key in rec2}
with open('%s/%s.pickle' % (DATA_DUMP, 'slow_measurements_sippv_vg_2'), 'wb') as handle:
    pickle.dump(slow_measurements_2, handle, protocol=pickle.HIGHEST_PROTOCOL)