![alt text](./Cerny_logo_1.jpg)

# Analysis of Cerny ventilation recordings

The data processed and analysed in this Notebook were collected by the **Neonatal Emergency and Transport Service of the Peter Cerny Foundation**, Budapest, Hungary

**Author: Dr Gusztav Belteki**


## Analysis of mechanically ventilated cases

Explorative data analysis of **145 ventilated cases** among recordings `AL000001 - AL000300`. 

- It calculates statistics on clinical details of ventilated cases and exports them as Excel files and as graphs. 
- It identifies ventilator modes, recordings with multiple ventilation modes and in those, the dominant ventilator mode; exports Excel files and graphs of these. 
- It calculates descriptive statistics on various ventilator parameters in the individual recordings and writes them to Excel files in different format (grouping).
- It produces time series graphs on various ventilator parameters and exports them.

Imported: 

- data_pars_measurements_ventilated_1_300.pickle,  
- data_pars_settings_ventilated_1_300.pickle, 
- data_pars_alarms_ventilated_1_300.pickle, 
- vent_modes_ventilated_1_300.pickle, 
- clin_df_pickle_1_300.pickle, 
- Fabian_parameters.xlsx

Exported: 

- Excel files and graphs about clinical data and ventilator modes 
- Time series graphs on ventilator parameters 
- **vent_modes_ventilated_1_300_plus.pickle** (additional data about multiple ventilator modes and dominant modes in in the DataFrame

### Importing the necessary libraries and setting options

In [None]:
import IPython
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import seaborn as sns
import sklearn as sk

import os
import sys
import re
import pickle

from scipy import stats
from pandas import Series, DataFrame
from datetime import datetime, timedelta

%matplotlib inline
matplotlib.style.use('classic')
matplotlib.rcParams['figure.facecolor'] = 'w'

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
# pd.set_option('mode.chained_assignment', None) 

In [None]:
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("SciPy version: {}".format(sp.__version__))
print("IPython version: {}".format(IPython.__version__))
print("scikit-learn version: {}".format(sk.__version__))

### List and set the working directory and the directory to write out data

In [None]:
# Topic of the Notebook which will also be the name of the subfolder containing results
TOPIC = 'fabian'

# Name of the external hard drive
DRIVE = 'GUSZTI'

# Directory containing clinical and blood gas data
CWD = '/Users/guszti/ventilation_fabian'

# Directory on external drive to read the ventilation data from
DIR_READ = '/Volumes/%s/Fabian/fabian_data' % DRIVE

DIR_WRITE = '%s/%s/%s' % (CWD, 'Analyses', 'analysis_ventilated_1_300')
if not os.path.isdir(DIR_WRITE):
    os.makedirs(DIR_WRITE)

# Images and raw data will be written on an external hard drive
if not os.path.isdir('/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC)):
    os.makedirs('/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC))
DATA_DUMP = '/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC)

In [None]:
os.chdir(CWD)
os.getcwd()

In [None]:
DIR_READ

In [None]:
DIR_WRITE

In [None]:
DATA_DUMP

### Import ventilator and clinical data from pickle archives

In [None]:
# Import ventilator parameters, settings and alarms

with open('%s/%s.pickle' % (DATA_DUMP, 'data_pars_measurements_ventilated_1_300'), 'rb') as handle:
    data_pars_measurements_ventilated = pickle.load(handle)
    
with open('%s/%s.pickle' % (DATA_DUMP, 'data_pars_settings_ventilated_1_300'), 'rb') as handle:
    data_pars_settings_ventilated = pickle.load(handle)
    
with open('%s/%s.pickle' % (DATA_DUMP, 'data_pars_alarms_ventilated_1_300'), 'rb') as handle:
    data_pars_alarms_ventilated = pickle.load(handle)

In [None]:
# Import DataFrame with ventilation modes

with open('%s/%s.pickle' % (DATA_DUMP, 'vent_modes_ventilated_1_300'), 'rb') as handle:
    vent_modes_ventilated = pickle.load(handle)

In [None]:
# Import clinical data

with open('%s/%s.pickle' % (DATA_DUMP, 'clin_df_1_300'), 'rb') as handle:
    clin_df = pickle.load(handle)

In [None]:
len(data_pars_measurements_ventilated)

In [None]:
cases = sorted(data_pars_measurements_ventilated.keys())

### Import table for interpreting ventilator parameters

In [None]:
par_key_table = pd.read_excel('Fabian_parameters.xlsx')
par_key_table;

## Statistics on clinical details of these cases

In [None]:
len(clin_df)

In [None]:
clin_df_ventilation = clin_df.loc[vent_modes_ventilated.index]

In [None]:
clin_df_ventilation.head(2)

In [None]:
clin_df_ventilation.info()

In [None]:
clin_df_ventilation_stats = round(clin_df_ventilation.describe(), 2)
clin_df_ventilation_stats

In [None]:
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'clinical_data_ventilated_1_300.xlsx'))
clin_df_ventilation.to_excel(writer, 'ventilated_cases')
clin_df_ventilation_stats.to_excel(writer, 'stats')
writer.save()

In [None]:
fig, ax = plt.subplots(figsize = (6, 4))
dpi = 300
filetype = 'jpg'
xticklabels = ['gestational age', 'corrected gestational age']

# Define styling for each boxplot component
medianprops = {'color': 'black', 'linewidth': 2}
boxprops = {'color': 'black', 'linestyle': '-'}
whiskerprops = {'color': 'black', 'linestyle': '-'}
capprops = {'color': 'black', 'linestyle': '-'}
flierprops = {'color': 'black', 'marker': '.'}

plt.boxplot([clin_df_ventilation['Gestational Age (weeks)'] ,
             clin_df_ventilation['Corrected gestational Age (weeks)']],
        whis = [5, 95], showfliers = True,showmeans = True, medianprops=medianprops, boxprops=boxprops, 
        whiskerprops=whiskerprops, capprops=capprops, flierprops = flierprops)

ax.set_ylabel('weeks', size = 14)
ax.tick_params(axis='both', which='major', labelsize=14)
ax.set_xticklabels(xticklabels)
plt.grid(True)

fig.savefig('%s/%s.%s' % (DIR_WRITE, 'ventilated_gest_age_1_300', filetype),
    dpi = dpi, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format = filetype,
    transparent=False, bbox_inches='tight', pad_inches=0.1, frameon=True);

In [None]:
fig, ax = plt.subplots(figsize = (6, 4))
dpi = 300
filetype = 'jpg'
xticklabels = ['birth weight', 'actual weight']

# Define styling for each boxplot component
medianprops = {'color': 'black', 'linewidth': 2}
boxprops = {'color': 'black', 'linestyle': '-'}
whiskerprops = {'color': 'black', 'linestyle': '-'}
capprops = {'color': 'black', 'linestyle': '-'}
flierprops = {'color': 'black', 'marker': '.'}

plt.boxplot([clin_df_ventilation['Birth Weight'], clin_df_ventilation['Weight']],
        whis = [5, 95], showfliers = True,showmeans = True, medianprops=medianprops, boxprops=boxprops, 
        whiskerprops=whiskerprops, capprops=capprops, flierprops = flierprops)

ax.set_xticklabels(xticklabels)
ax.set_ylim(0, 5500)
ax.set_ylabel('grams', size = 14)
ax.tick_params(axis='both', which='major', labelsize=14)
plt.grid(True)

fig.savefig('%s/%s.%s' % (DIR_WRITE, 'ventilated_weight_1_300', filetype),
    dpi = dpi, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format = filetype,
    transparent=False, bbox_inches='tight', pad_inches=0.1, frameon=True);

In [None]:
fig, ax = plt.subplots(figsize = (3, 4))
dpi = 300
filetype = 'jpg'
xticklabels = ['recording duration']

# Define styling for each boxplot component
medianprops = {'color': 'black', 'linewidth': 2}
boxprops = {'color': 'black', 'linestyle': '-'}
whiskerprops = {'color': 'black', 'linestyle': '-'}
capprops = {'color': 'black', 'linestyle': '-'}
flierprops = {'color': 'black', 'marker': '.'}

plt.boxplot(clin_df_ventilation['Duration'] / (60 * 1E+9), 
        whis = [5, 95], showfliers = True,showmeans = True, medianprops=medianprops, boxprops=boxprops, 
        whiskerprops=whiskerprops, capprops=capprops, flierprops = flierprops)

ax.set_xticklabels(xticklabels)
ax.set_ylabel('minutes', size = 14)
ax.tick_params(axis='both', which='major', labelsize=14)
plt.grid(True)

fig.savefig('%s/%s.%s' % (DIR_WRITE, 'ventilation_duration_1_300', filetype),
    dpi = dpi, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format = filetype,
    transparent=False, bbox_inches='tight', pad_inches=0.1, frameon=True);

## How many cases with the different ventilation modes

### Which recordings had more than one ventilation modes

In [None]:
multiple_mode = [case for case in cases if
        (vent_modes_ventilated.loc[case][['IPPV', 'PSV', 'SIMV', 'SIMVPSV', 'SIPPV']] != 0).sum() > 1]

In [None]:
len(multiple_mode)

In [None]:
vent_modes_ventilated['multiple_mode'] = np.where(vent_modes_ventilated.index.isin(multiple_mode), 'Yes', 'No')

In [None]:
vent_modes_ventilated.head()

In [None]:
vent_modes_ventilated.loc[multiple_mode]

In [None]:
def autolabel(rects):
    """
    Attach a text label above each bar displaying its height
    """
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
                '%d' % int(height), ha='center', va='bottom', size = 14)

### Considering all modes used during recordings

In [None]:
print('SIMV:', sum(vent_modes_ventilated['SIMV'] > 0))
print('SIPPV:', sum(vent_modes_ventilated['SIPPV'] > 0))
print('SIMVPSV:', sum(vent_modes_ventilated['SIMVPSV'] > 0))
print('PSV:', sum(vent_modes_ventilated['PSV'] > 0))
print('IPPV:', sum(vent_modes_ventilated['IPPV'] > 0))
print('VG_on:', sum(vent_modes_ventilated['VG_on'] > 0))
print('total', len(vent_modes_ventilated))

In [None]:
dpi = 300
filetype = 'jpg'
labels = ['SIMV', 'SIPPV', 'SIMV-PSV', 'IPPV', 'PSV']
xticks = np.arange(len(labels))
width = 0.6

fig, ax = plt.subplots(figsize = [6,4])
rects = plt.bar(xticks, [sum(vent_modes_ventilated['SIMV'] > 0), sum(vent_modes_ventilated['SIPPV'] > 0), 
                         sum(vent_modes_ventilated['SIMVPSV'] > 0), sum(vent_modes_ventilated['IPPV'] > 0),
                         sum(vent_modes_ventilated['PSV'] > 0)], 
                        width=width, color='black', alpha  = 0.75, align = 'center')

ax.set_xlabel('ventilation mode', size = 14)
ax.set_xticks(xticks)
ax.set_xticklabels(labels, size = 14, rotation = 0)
ax.set_ylabel('number of cases', size = 14)
ax.set_ylim(0, 130)
ax.grid(True)

autolabel(rects)

fig.savefig('%s/%s.%s' % (DIR_WRITE, 'vent_modes_ventilated_1_300', filetype),
    dpi = dpi, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format = filetype,
    transparent=False, bbox_inches='tight', pad_inches=0.1, frameon=True);

### Considering only the predominant ventilation modes

In [None]:
vent_modes_ventilated['dominant_mode'] = \
    vent_modes_ventilated[['IPPV', 'PSV', 'SIMV', 'SIMVPSV', 'SIPPV']].idxmax(axis = 1)

In [None]:
vent_modes_ventilated.head()

In [None]:
vent_modes_ventilated['dominant_mode'].value_counts()

In [None]:
dpi = 300
filetype = 'jpg'
xticks = np.arange(len(vent_modes_ventilated['dominant_mode'].value_counts()))
fig, ax = plt.subplots(figsize = [6,4])

vent_modes_ventilated['dominant_mode'].value_counts().plot(kind = 'bar', ax = ax, color='black', 
                                                           alpha  = 0.75, rot= 'horizontal')
ax.set_xticklabels(labels, size = 14, rotation = 0)
ax.set_xlabel('ventilation mode', size = 14)
ax.set_ylabel('number of cases', size = 14)
ax.set_ylim(0, 130)
ax.grid(True)

fig.savefig('%s/%s.%s' % (DIR_WRITE, 'vent_modes_ventilated_dominant_mode_1_300', filetype),
    dpi = dpi, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format = filetype,
    transparent=False, bbox_inches='tight', pad_inches=0.1, frameon=True);

### Considering only the recordings with a single ventilator mode

In [None]:
sum(vent_modes_ventilated['multiple_mode'] == 'No')

In [None]:
only_mode = vent_modes_ventilated[vent_modes_ventilated['multiple_mode'] == 'No']['dominant_mode'].value_counts()
only_mode

In [None]:
dpi = 300
filetype = 'jpg'
xticks = np.arange(len(only_mode))
fig, ax = plt.subplots(figsize = [6,4])

only_mode.plot(kind = 'bar', ax = ax, color='black', alpha  = 0.75, rot= 'horizontal')

ax.set_xticklabels(labels, size = 14, rotation = 0)
ax.set_xlabel('ventilation mode', size = 14)
ax.set_ylabel('number of cases', size = 14)
ax.set_ylim(0, 100)
ax.grid(True)

fig.savefig('%s/%s.%s' % (DIR_WRITE, 'vent_modes_ventilated_only_mode_1_300', filetype),
    dpi = dpi, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format = filetype,
    transparent=False, bbox_inches='tight', pad_inches=0.1, frameon=True);

### How many recordings had VG ventilation and for how long

In [None]:
# How many nan values
sum(vent_modes_ventilated['VG_on'].isnull())

In [None]:
# has some VG ventilationa
sum(vent_modes_ventilated['VG_on'] > 0)

In [None]:
# has no VG ventilation
len(vent_modes_ventilated) - sum(vent_modes_ventilated['VG_on'] > 0)

In [None]:
has_VG = vent_modes_ventilated[vent_modes_ventilated['VG_on'] > 0]

In [None]:
# VG was on throughout the whole recording 
only_VG = has_VG[has_VG['VG_on'] == has_VG['total']]
len(only_VG)

In [None]:
only_VG['dominant_mode'].value_counts()

#### Save extended file about ventilation modes to pickle archive

In [None]:
with open('%s/%s.pickle' % (DATA_DUMP, 'vent_modes_ventilated_1_300_plus'), 'wb') as handle:
    pickle.dump(vent_modes_ventilated, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Resample the data to calculate 1-minute means

In [None]:
data_pars_measurements_ventilated_1min_mean = {}
data_pars_settings_ventilated_1min_mean = {}

for case in cases:
    data_pars_measurements_ventilated_1min_mean[case] = \
        data_pars_measurements_ventilated[case].resample('1min').mean()
    data_pars_settings_ventilated_1min_mean[case] = \
        data_pars_settings_ventilated[case].resample('1min').mean()

# Statistics on ventilatory parameters of these cases

## Descriptive statistics on `measured ventilator parameters`

### Statistics on individual cases

In [None]:
stats_pars_measurements_ventilated = {} 

for case in cases:
    stats_pars_measurements_ventilated[case] = \
        round(data_pars_measurements_ventilated[case].describe(percentiles = 
                                                    (0.05, 0.25, 0.5, 0.75, 0.95)), 2)
    stats_pars_measurements_ventilated[case].index = ['data_points', 'mean', 'SD', 'min', '5pc', 
                                                      '25pc', 'median', '75pc', '95pc', 'max']

In [None]:
stats_pars_measurements_ventilated['AL000008']

In [None]:
# Create table with statistics for all cases and all relevant parameters
stats_pars_measurements_ventilated_all = pd.concat(stats_pars_measurements_ventilated, axis = 1).T

In [None]:
# Remove measured parameters not given in case of mechanical ventilation
stats_pars_measurements_ventilated_all.dropna(how = 'all', subset = ['mean', 'SD', 'min', '5pc', 
                                '25pc', 'median', '75pc', '95pc', 'max'], axis = 0, inplace = True)

In [None]:
stats_pars_measurements_ventilated_all.info()

In [None]:
stats_pars_measurements_ventilated_all.head()

In [None]:
stats_pars_measurements_ventilated_all.loc['AL000003']

In [None]:
stats_pars_measurements_ventilated_all.swaplevel(0,1).loc['VTemand_resp_kg'].head()

### Statistics on individual parameters

In [None]:
# selected individual parameters
parameters = ['C20_C', 'Cdyn', 'FiO2', 'Leak', 'MAP',  'MV_kg', 'MVresp', 'PEEP', 'PIP','R', 'RR', 
              'Trigger', 'VTemand_kg', 'VTespon_pat_kg', 'VTemand_resp_kg',  'VTimand_kg']

stats_pars_measurements_ventilated_2 = {}

for parameter in parameters:
    stats_pars_measurements_ventilated_2[parameter] = \
        stats_pars_measurements_ventilated_all.swaplevel(0,1).loc[parameter].sort_values('mean', ascending = False)

In [None]:
stats_pars_measurements_ventilated_2['VTemand_resp_kg'].head()

In [None]:
# Unstack table to create table for all parameters with different configuration
stats_pars_measurements_ventilated_all_2 = stats_pars_measurements_ventilated_all.unstack()

In [None]:
stats_pars_measurements_ventilated_all_2.info()

In [None]:
stats_pars_measurements_ventilated_all_2.head()

In [None]:
stats_pars_measurements_ventilated_all_2['mean'].head()

### Export statistics to a multisheet Excel file and pickle archive

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_measurements_ventilated_1_300.xlsx'))
for case in cases:
    stats_pars_measurements_ventilated[case].to_excel(writer, case)
writer.save()

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_measurements_ventilated_1_300_2.xlsx'))
for parameter in parameters:
    stats_pars_measurements_ventilated_2[parameter].to_excel(writer, parameter)
writer.save()

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_measurements_ventilated_all_1_300.xlsx'))
stats_pars_measurements_ventilated_all.to_excel(writer, 'stats_all_1')
stats_pars_measurements_ventilated_all_2.to_excel(writer, 'stats_all_2')
writer.save()

### Group statistics

In [None]:
# Create short alias for variable name
a = stats_pars_measurements_ventilated_all_2

In [None]:
# How many data points for the various parameters in each recording
a['data_points'].head()

In [None]:
# Some parameters are only present in case of SIMV: VTemand, VTespon, MVresp
# Some parameters are only present in case of PSV: Ti_PSV

a['data_points'].info()

In [None]:
a['mean'].describe()

In [None]:
a['median'].describe()

In [None]:
percentiles = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]

# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_measurements_ventilated_group_1_300.xlsx'))
stats_pars_measurements_ventilated_all_2['mean'].describe(percentiles = percentiles).to_excel(writer, 'mean')
stats_pars_measurements_ventilated_all_2['median'].describe(percentiles = percentiles).to_excel(writer, 'median')
writer.save()

## Descriptive statistics on `ventilator settings`

### Statistics on individual cases

In [None]:
stats_pars_settings_ventilated = {} 
for case in cases:
    stats_pars_settings_ventilated[case] = \
        round(data_pars_settings_ventilated[case].describe(percentiles = (0.05, 0.25, 0.5, 0.75, 0.95)), 2)
    stats_pars_settings_ventilated[case].index = ['data_points', 'mean', 'SD', 'min', '5pc', 
                                     '25pc', 'median', '75pc', '95pc', 'max']

In [None]:
stats_pars_settings_ventilated['AL000008']

In [None]:
# Create table with statistics for all cases and all relevant parameters
stats_pars_settings_ventilated_all = pd.concat(stats_pars_settings_ventilated, axis = 1).T

In [None]:
# Remove measured parameters not given in case of mechanical ventilation
stats_pars_settings_ventilated_all.dropna(how = 'all', subset = ['mean', 'SD', 'min', '5pc', 
                                '25pc', 'median', '75pc', '95pc', 'max'], axis = 0, inplace = True)

In [None]:
stats_pars_settings_ventilated_all.info()

In [None]:
stats_pars_settings_ventilated_all.head()

In [None]:
stats_pars_settings_ventilated_all.loc['AL000007']

### Statistics on individual settings

In [None]:
# selected ventilator settings
parameters = ['FiO2_set', 'Flow_exp_set', 'Flow_insp_set', 'IE_E_set', 'IE_I_set',
       'MV_lim_high_set', 'MV_lim_high_set_kg', 'MV_lim_low_set',
       'MV_lim_low_set_kg', 'PEEP_set', 'PIP_lim_high_set', 'PIP_lim_low_set',
       'PIP_set', 'PIP_set_PSV', 'RR_set', 'Te_set', 'Term_criteria_PSV_set',
       'Ti_set', 'Trigger_sens_set', 'VG_set', 'VG_set_kg']

stats_pars_settings_ventilated_2 = {}

for parameter in parameters:
    stats_pars_settings_ventilated_2[parameter] = \
        stats_pars_settings_ventilated_all.swaplevel(0,1).loc[parameter].sort_values('mean', ascending = False)

In [None]:
stats_pars_settings_ventilated_2['Ti_set'].head()

In [None]:
# Unstack table to create table for all settings with different configuration
stats_pars_settings_ventilated_all_2 = stats_pars_settings_ventilated_all.unstack()

In [None]:
stats_pars_settings_ventilated_all_2.info()

In [None]:
stats_pars_settings_ventilated_all_2.head(10)

In [None]:
stats_pars_settings_ventilated_all_2['mean'].head(10)

### Export statistics to a multisheet Excel file and pickle archive

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_settings_ventilated_1_300.xlsx'))
for case in cases:
    stats_pars_settings_ventilated[case].to_excel(writer, case)
writer.save()

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_settings_ventilated_1_300_2.xlsx'))
for parameter in parameters:
    stats_pars_settings_ventilated_2[parameter].to_excel(writer, parameter)
writer.save()

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_settings_ventilated_all_1_300.xlsx'))
stats_pars_settings_ventilated_all.to_excel(writer, 'stats_all_1')
stats_pars_settings_ventilated_all_2.to_excel(writer, 'stats_all_2')
writer.save()

### Group statistics

In [None]:
# Create short alias for variable name
a = stats_pars_settings_ventilated_all_2

In [None]:
# How many data points for the various parameters in each recording
a['data_points'].head()

In [None]:
# Some parameters are only present in case of VG ventilation: VG_set
# Some parameters are only present in case of PSV: PIP_set_PSV, Term_criteria_PSV_set
# MV limits, Apnea time were not always set
# Trigger sensitivity was not always set (no flow sensor / not synchronized??)

a['data_points'].info()

In [None]:
a['mean'].describe()

In [None]:
a['median'].describe()

In [None]:
percentiles = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]

# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stats_pars_settings_ventilated_group_1_300.xlsx'))
stats_pars_settings_ventilated_all_2['mean'].describe(percentiles = percentiles).to_excel(writer, 'mean')
stats_pars_settings_ventilated_all_2['median'].describe(percentiles = percentiles).to_excel(writer, 'median')
writer.save()

### Statistics on categorical settings

In [None]:
parameters_2 = ['Flow_sensor_state', 'Measuring_unit_pressure_set', 
        'Oxy_sensor_state', 'Patient_range', 'Powerstate',
       'Pressure_rise_control',  'Trigger_mode', 'VG_state', 'Ventilation_stopped',
       'Ventilator_mode', 'Ventilator_range']

In [None]:
data_pars_settings_ventilated_combined_cat = pd.concat(data_pars_settings_ventilated, sort = True)[parameters_2]

In [None]:
data_pars_settings_ventilated_combined_cat.head()

In [None]:
data_pars_settings_ventilated_combined_cat.describe()

- Measuring_unit_pressure_set: always `cmH2O`
- Patient_range: always neonatal
- Pressure_rise_control: always I-flow
- Trigger_mode: always Volumetrigger
- Ventilator_range: always Neonatal


In [None]:
data_pars_settings_ventilated_combined_cat.isnull().sum()

In [None]:
data_pars_settings_ventilated_combined_cat['Powerstate'].value_counts()

In [None]:
data_pars_settings_ventilated_combined_cat['Ventilation_stopped'].value_counts()

In [None]:
data_pars_settings_ventilated_combined_cat['Ventilator_mode'].value_counts()

In [None]:
# Create alias for long name
c = data_pars_settings_ventilated_combined_cat

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'stat_pars_settings_ventilated_combined_cat_1_300.xlsx'))
data_pars_settings_ventilated_combined_cat.describe().to_excel(writer, 'categorical vars')
writer.save()

## Descriptive statistics on `ventilator alarms`

In [None]:
data_pars_settings_ventilated_alarms_all = pd.concat(data_pars_alarms_ventilated, sort = True)
data_pars_settings_ventilated_alarms_all.fillna(0, inplace = True)

In [None]:
len(data_pars_settings_ventilated_alarms_all)

In [None]:
data_pars_settings_ventilated_alarms_all.head()

In [None]:
data_pars_settings_ventilated_alarms_all['Alarm_susp'].value_counts()

In [None]:
for col in data_pars_settings_ventilated_alarms_all:
    print(col, '\n', data_pars_settings_ventilated_alarms_all[col].value_counts(), '\n')

In [None]:
data_pars_settings_ventilated_alarms_all.sum() / len(data_pars_settings_ventilated_alarms_all) * 100

In [None]:
data_pars_settings_ventilated_alarms_all.reset_index(level = 1, inplace = True)
grouped = data_pars_settings_ventilated_alarms_all.groupby(data_pars_settings_ventilated_alarms_all.index)

In [None]:
alarm_counts = grouped.sum()
alarm_counts.head()

In [None]:
alarm_pc = grouped.sum().div(grouped.size(), axis = 0) * 100
alarm_pc.head()

In [None]:
# Save statistics into Excel file
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'alarm_stats_ventilated_1_300.xlsx'))
alarm_counts.to_excel(writer, 'alarm_counts')
alarm_pc.to_excel(writer, 'alarm_pc')
writer.save()

## Write relevant graphs about all recordings to the DATA_DUMP folder

### Graphs for ventilator recordings

#### Tidal volume of mandatory inflations

In [None]:
%%time

par = 'VTemand_kg'
dim = 'mL/kg'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_measurements_ventilated[case][par].plot(ax = ax, label = par)
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    # ax.set_ylim(0, 10)
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend()
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, par, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()

#### Inspiratory and expiratory mandatory tidal volume

In [None]:
%%time

pars = ['VTimand_kg', 'VTemand_kg']
name = 'VTimand_emand'
dim = 'mL/kg'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_measurements_ventilated[case][pars[0]].plot(ax = ax, label = pars[0])
    data_pars_measurements_ventilated[case][pars[1]].plot(ax = ax, label = pars[1])
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    # ax.set_ylim(0, 10)
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend()
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, name, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()

#### Minute volume

In [None]:
%%time

par = 'MV_kg'
dim = 'mL/kg/min'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_measurements_ventilated[case][par].plot(ax = ax, label = par)
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend()
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, par, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()

#### Pressures (PIP, MAP and PEEP)

In [None]:
%%time

pars = ['PIP', 'MAP', 'PEEP']
name = 'pressures'
dim = 'cmH2O'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_measurements_ventilated[case][pars[0]].plot()
    data_pars_measurements_ventilated[case][pars[1]].plot(color = 'black', linewidth = 2)
    data_pars_measurements_ventilated[case][pars[2]].plot(color = 'red')
    
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend()
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, name, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()

#### Backup respiratory rate

In [None]:
%%time

par = 'RR_set'
dim = '1/min'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_settings_ventilated[case][par].plot(ax = ax, label = par)
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    ax.set_ylim(0, 70)
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend()
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, par, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()

#### Actual rate and backup rate for those recordings that have SIPPV and/or SIMPSV parts

In [None]:
%%time

pars = ['RR', 'RR_set']
name = 'rates'
dim = '1/min'; filetype = 'jpg'; dpi = 200

for case in cases:
    if vent_modes_ventilated.loc[case]['SIPPV'] > 0 or vent_modes_ventilated.loc[case]['SIMVPSV'] > 0:
        # print('Saving %s' % case)
        fig = plt.figure()
        fig.set_size_inches(8, 4)
        fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
        ax = fig.add_subplot(1, 1, 1)
        data_pars_measurements_ventilated[case][pars[0]].plot(ax = ax, label = pars[0])
        data_pars_settings_ventilated[case][pars[1]].plot(ax = ax, label = pars[1])
        ax.set_xlabel('Time', size = 14, color = 'black')
        ax.set_ylabel(dim, size = 14, color = 'black')
        #ax.set_ylim(0, 70)
        ax.set_title(case,  size = 14, color = 'black')
        ax.legend()
        ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
        xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
        #ax.set_xticks(xticks)
        ax.tick_params(which = 'both', labelsize=12)
            
        fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, name, filetype), dpi = dpi, 
                facecolor='w', edgecolor='w', orientation='portrait', 
                papertype=None, format = filetype, transparent=False, bbox_inches='tight',
                pad_inches=0.1, frameon=True);

        plt.close();
    

#### FiO2

In [None]:
%%time

par = 'FiO2_set'
dim = '%'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_settings_ventilated[case][par].plot(ax = ax, label = par)
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    ax.set_ylim(0, 100)
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend('FiO2')
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, par, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()

#### Leak

In [None]:
%%time

par = 'Leak'
dim = '%'; filetype = 'jpg'; dpi = 200

for case in cases:
    # print('Saving %s' % case)
    fig = plt.figure()
    fig.set_size_inches(8, 4)
    fig.subplots_adjust(left=None, bottom=None, right=None, top=None, 
                            wspace=None, hspace=0.1)
    ax = fig.add_subplot(1, 1, 1)
    data_pars_measurements_ventilated[case][par].plot(ax = ax, label = par)
    ax.set_xlabel('Time', size = 14, color = 'black')
    ax.set_ylabel(dim, size = 14, color = 'black')
    ax.set_ylim(-5, 100)
    ax.set_title(case,  size = 14, color = 'black')
    ax.legend()
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
    xticks = pd.date_range(start = data_pars_measurements_ventilated[case].index.min(), 
                       end = data_pars_measurements_ventilated[case].index.max(), freq='15min')
    #ax.set_xticks(xticks)
    ax.tick_params(which = 'both', labelsize=12)
            
    fig.savefig('%s/%s/%s/%s_%s.%s' % (DATA_DUMP, 'fabian_cases', case, case, par, filetype), dpi = dpi, 
        facecolor='w', edgecolor='w', orientation='portrait', 
        papertype=None, format = filetype, transparent=False, bbox_inches='tight',
        pad_inches=0.1, frameon=True);

    plt.close()