![alt text](./Cerny_logo_1.jpg)

# Analysis of Cerny ventilation recordings

#### Manual revision and trimming of ventilator recordings.

This notebook imports the preprocessed **Fabian ventilator parameters** data from pickle archive and performs trimming based on manual inspection of data.

The data processed and analysed in this Notebook were collected by the **Neonatal Emergency and Transport Service of the Peter Cerny Foundation**, Budapest, Hungary

**Author: Dr Gusztav Belteki**

### 1. Import the required libraries and set options

In [1]:
import IPython
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import os
import sys
import pickle

from pandas import Series, DataFrame
from datetime import datetime, timedelta
from matplotlib import dates

%matplotlib inline
matplotlib.style.use('classic')
matplotlib.rcParams['figure.facecolor'] = 'w'

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
# pd.set_option('mode.chained_assignment', None)

import warnings
warnings.simplefilter("ignore")

In [2]:
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("IPython version: {}".format(IPython.__version__))

Python version: 3.9.18 (main, Sep 11 2023, 08:38:23) 
[Clang 14.0.6 ]
pandas version: 2.1.4
matplotlib version: 3.8.0
NumPy version: 1.24.3
IPython version: 8.15.0


### 2. List and set the working directory and the directory to write out data

In [3]:
# Name of the external hard drive
DRIVE = 'GUSZTI'

# Directory on external drive to read the clinical from
DIR_READ = os.path.join(os.sep, 'Volumes', DRIVE, 'Fabian', 'fabian_patient_data_all')

# Path to project folder containing ventilation research results
PATH = os.path.join(os.sep, 'Users', 'guszti', 'Library', 'Mobile Documents', 'com~apple~CloudDocs', 
                            'Documents', 'Research', 'Ventilation')

# Folder to export the result of analysis
DIR_WRITE = os.path.join(PATH, 'ventilation_fabian', 'Analyses')
os.makedirs(DIR_WRITE, exist_ok = True)

# Folder on a USB stick to export data to and to import processed data exported by other Notebooks
DATA_DUMP = os.path.join(os.sep, 'Volumes', DRIVE, 'data_dump', 'fabian',)
os.makedirs(DATA_DUMP, exist_ok = True)

In [4]:
DIR_READ, DIR_WRITE, DATA_DUMP

('/Volumes/GUSZTI/Fabian/fabian_patient_data_all',
 '/Users/guszti/Library/Mobile Documents/com~apple~CloudDocs/Documents/Research/Ventilation/ventilation_fabian/Analyses',
 '/Volumes/GUSZTI/data_dump/fabian')

### 3. Import pickle archives

In [5]:
with open(os.path.join(DATA_DUMP, 'clin_df_1_1100.pickle'), 'rb') as handle:
    clin_df = pickle.load(handle)

with open(os.path.join(DATA_DUMP, 'data_pars_measurements_1_1100.pickle'), 'rb') as handle:
    data_pars_measurements = pickle.load(handle)
    
with open(os.path.join(DATA_DUMP, 'data_pars_settings_1_1100.pickle'), 'rb') as handle:
    data_pars_settings = pickle.load(handle)
    
with open(os.path.join(DATA_DUMP, 'data_pars_alarms_1_1100.pickle'), 'rb') as handle:
    data_pars_alarms = pickle.load(handle)

In [6]:
len(data_pars_measurements)

792

In [7]:
cases = data_pars_measurements.keys()

### EDA on ventilation modes

##### How many cases of the different ventilation modes occur

In [8]:
vent_modes = {}
for case in cases:
    # Multiply by two to get the number of seconds
    vent_modes[case] = data_pars_settings[case]['Ventilator_mode'].value_counts() * 2
    
vent_modes = DataFrame(vent_modes).T

vent_modes.replace(np.nan, 0, inplace = True)

In [9]:
# Add the duration of the recordings

recording_duration = {}

for case in cases:
    recording_duration[case] = 2 * len(data_pars_settings[case])

In [10]:
# No PSV
vent_modes['ventilation'] = vent_modes['IPPV'] + vent_modes['SIMV'] + \
                            vent_modes['SIPPV'] + vent_modes['SIMVPSV']

vent_modes['noninvasive'] = vent_modes['CPAP'] + vent_modes['DUOPAP'] + \
                            vent_modes['NCPAP'] + vent_modes['O2therapy']  

vent_modes['total'] = Series(recording_duration)

In [11]:
vent_modes.head()

Ventilator_mode,CPAP,DUOPAP,IPPV,NCPAP,O2therapy,PSV,SIMV,SIMVPSV,SIPPV,ventilation,noninvasive,total
AL000003,0.0,0.0,0.0,4.0,0.0,0.0,6294.0,0.0,0.0,6294.0,4.0,6298
AL000005,0.0,0.0,0.0,2548.0,0.0,0.0,0.0,0.0,0.0,0.0,2548.0,2548
AL000006,0.0,0.0,0.0,164.0,0.0,0.0,8.0,2834.0,0.0,2842.0,164.0,3006
AL000007,0.0,0.0,0.0,0.0,0.0,0.0,7156.0,74.0,0.0,7230.0,0.0,7230
AL000008,0.0,0.0,0.0,0.0,0.0,0.0,6762.0,0.0,0.0,6762.0,0.0,6762


In [12]:
print('SIMV:', sum(vent_modes['SIMV'] > 0))
print('SIPPV:', sum(vent_modes['SIPPV'] > 0))
# print('SIMVPSV:', sum(vent_modes['SIMVPSV'] > 0))
#print('PSV:', sum(vent_modes['PSV'] > 0))
print('IPPV:', sum(vent_modes['IPPV'] > 0))
print('NCPAP:', sum(vent_modes['NCPAP'] > 0))
print('CPAP:', sum(vent_modes['CPAP'] > 0))
print('DUOPAP:', sum(vent_modes['DUOPAP'] > 0))
print('O2therapy:', sum(vent_modes['O2therapy'] > 0))
print('ventilation:', sum(vent_modes['ventilation'] > 0))
print('noninvasive:', sum(vent_modes['noninvasive'] > 0))
print('total', len(vent_modes))

SIMV: 426
SIPPV: 248
IPPV: 95
NCPAP: 445
CPAP: 16
DUOPAP: 101
O2therapy: 128
ventilation: 605
noninvasive: 577
total 792


In [13]:
# How many seconds of each ventilation mode in total?
total_duration = DataFrame(vent_modes.sum(axis = 0), columns = ['duration (seconds)'])
total_duration

Unnamed: 0_level_0,duration (seconds)
Ventilator_mode,Unnamed: 1_level_1
CPAP,3634.0
DUOPAP,196538.0
IPPV,65176.0
NCPAP,1023242.0
O2therapy,268132.0
PSV,264.0
SIMV,1195964.0
SIMVPSV,97416.0
SIPPV,632714.0
ventilation,1991270.0


##### Export Dataframes containing ventilator modes to Excel files and pickle archives

In [14]:
writer = pd.ExcelWriter(os.path.join(DIR_WRITE, 'ventilation_modes_1_1100.xlsx'))
vent_modes.to_excel(writer, 'vent_modes')
total_duration.to_excel(writer, 'total_duration')
writer.close()

In [15]:
with open(os.path.join(DATA_DUMP, 'vent_modes_1_1100'), 'wb') as handle:
    pickle.dump(vent_modes, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Only consider those recordings that have at least 15 minutes (900 seconds) mechanical ventilation

In [16]:
vent_modes_ventilated = vent_modes[vent_modes['ventilation'] > 900]
len(vent_modes_ventilated)

408

In [17]:
vent_modes_ventilated.head()

Ventilator_mode,CPAP,DUOPAP,IPPV,NCPAP,O2therapy,PSV,SIMV,SIMVPSV,SIPPV,ventilation,noninvasive,total
AL000003,0.0,0.0,0.0,4.0,0.0,0.0,6294.0,0.0,0.0,6294.0,4.0,6298
AL000006,0.0,0.0,0.0,164.0,0.0,0.0,8.0,2834.0,0.0,2842.0,164.0,3006
AL000007,0.0,0.0,0.0,0.0,0.0,0.0,7156.0,74.0,0.0,7230.0,0.0,7230
AL000008,0.0,0.0,0.0,0.0,0.0,0.0,6762.0,0.0,0.0,6762.0,0.0,6762
AL000009,0.0,0.0,0.0,0.0,0.0,0.0,42.0,0.0,2756.0,2798.0,0.0,2798


In [18]:
cases = sorted(vent_modes_ventilated.index)
len(cases)

408

### Remove recordings that had no flow working sensor and hence tidal volume measurements

In [19]:
to_remove = ['AL000314', 'AL000350', 'AL000354', 'AL000401', 'AL000449', 'AL000459','AL000492', 
             'AL000493', 'AL000629', 'AL000828','AL000890', 'AL000893', 'AL000895', 'AL000897',
             'AL000934', 'AL000944', 'AL000975', ]

for case in to_remove:
    if case in cases:
        cases.remove(case)

### Remove the periods from the beginning and the end of the recordings when the patient was not connected to the ventilator

This requires manual inspection of the tidal volume and pressure graphs

This dictionary contains tuples of the start and end points as strings
This was obtained by manual inspection of VTmand and PIP and the recordings
and manually removing the start and the end when the baby was not on the ventilator (e.g. no VTmand)

In [20]:
with open(os.path.join(DATA_DUMP, 'limit_1_300_ventilated.pickle'), 'rb') as handle:
    limit_1_300 = pickle.load(handle)
with open(os.path.join(DATA_DUMP, 'limit_301_600_ventilated.pickle'), 'rb') as handle:
    limit_301_600 = pickle.load(handle)
with open(os.path.join(DATA_DUMP, 'limit_601_900_ventilated.pickle'), 'rb') as handle:
    limit_601_900 = pickle.load(handle)
with open(os.path.join(DATA_DUMP, 'limit_901_1100_ventilated.pickle'), 'rb') as handle:
    limit_901_1100 = pickle.load(handle)

limit = {**limit_1_300, **limit_301_600, **limit_601_900, **limit_901_1100}

In [21]:
limit;

In [22]:
len(limit)

391

In [23]:
print(sorted(set(cases) - set(limit)))

[]


In [24]:
with open(os.path.join(DATA_DUMP, 'limit_1_1100_ventilated'), 'wb') as handle:
    pickle.dump(limit, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [25]:
# Trim ventilator data using the manual filters

data_pars_measurements_ventilated = {}
data_pars_settings_ventilated = {}
data_pars_alarms_ventilated = {}

for case in cases:
    data_pars_measurements_ventilated[case] = data_pars_measurements[case][limit[case][0] : limit[case][1]]
    data_pars_settings_ventilated[case] = data_pars_settings[case][limit[case][0] : limit[case][1]]
    data_pars_alarms_ventilated[case] = data_pars_alarms[case][limit[case][0] : limit[case][1]]

### Now re-analyse the filtered data as above

##### How many cases of the different ventilation modes occur

In [27]:
vent_modes_ventilated = {}
for case in cases:
    # Multiply by two to get the number of seconds
    vent_modes_ventilated[case] = data_pars_settings_ventilated[case]['Ventilator_mode'].value_counts() * 2
    
vent_modes_ventilated = DataFrame(vent_modes_ventilated).T

vent_modes_ventilated.replace(np.nan, 0, inplace = True)

In [28]:
# Add the duration of the recordings

recording_duration = {}

for case in cases:
    recording_duration[case] = 2 * len(data_pars_settings[case])

In [29]:
vent_modes_ventilated.head()

Ventilator_mode,IPPV,O2therapy,PSV,SIMV,SIMVPSV,SIPPV
AL000003,0.0,0.0,0.0,5160.0,0.0,0.0
AL000006,0.0,0.0,0.0,0.0,2280.0,0.0
AL000007,0.0,0.0,0.0,6660.0,0.0,0.0
AL000008,0.0,0.0,0.0,6242.0,0.0,0.0
AL000009,0.0,0.0,0.0,0.0,0.0,2250.0


### Add VG data

In [31]:
VG

VG_state,off,on
AL000003,5160.0,
AL000006,2280.0,
AL000007,6660.0,
AL000008,,6242.0
AL000009,2238.0,12.0
...,...,...
AL001032,,4852.0
AL001037,,4738.0
AL001040,,6234.0
AL001055,,1052.0


In [32]:
VG = {}
for case in cases:
    try:
        # Multiply by two to get the number of seconds
        VG[case] = data_pars_settings_ventilated[case]['VG_state'].value_counts() * 2
    except KeyError:
        VG[case] = np.zeros(1)
        # print('No VG_state for %s' % case)
        
VG = DataFrame(VG).T
VG.columns = ['VG_off','VG_on']

In [33]:
vent_modes_ventilated = pd.concat([vent_modes_ventilated, VG], axis = 1)

In [34]:
# Add the duration of the recordings

recording_duration_ventilated = {}

for case in cases:
    recording_duration_ventilated[case] = 2 * len(data_pars_settings_ventilated[case])

In [35]:
vent_modes_ventilated['total'] = Series(recording_duration_ventilated)

In [36]:
vent_modes_ventilated.head()

Unnamed: 0,IPPV,O2therapy,PSV,SIMV,SIMVPSV,SIPPV,VG_off,VG_on,total
AL000003,0.0,0.0,0.0,5160.0,0.0,0.0,5160.0,,5160
AL000006,0.0,0.0,0.0,0.0,2280.0,0.0,2280.0,,2280
AL000007,0.0,0.0,0.0,6660.0,0.0,0.0,6660.0,,6660
AL000008,0.0,0.0,0.0,6242.0,0.0,0.0,,6242.0,6242
AL000009,0.0,0.0,0.0,0.0,0.0,2250.0,2238.0,12.0,2250


In [37]:
# How many seconds of each ventilation mode in total?
total_duration_ventilated = DataFrame(vent_modes_ventilated.sum(axis = 0), columns = ['duration (seconds)'])
total_duration_ventilated

Unnamed: 0,duration (seconds)
IPPV,41542.0
O2therapy,704.0
PSV,208.0
SIMV,1034460.0
SIMVPSV,82944.0
SIPPV,530586.0
VG_off,456166.0
VG_on,1234278.0
total,1690444.0


### Only consider those recordings that have at least 15 minutes (900 seconds) mechanical ventilation

In [38]:
# After the trimming all but one recording remained longer than 15 minutes. 
# Remove that

len(vent_modes_ventilated[vent_modes_ventilated['total'] > 900])

388

In [39]:
vent_modes_ventilated = vent_modes_ventilated[vent_modes_ventilated['total'] > 900]
cases = sorted(vent_modes_ventilated.index)

In [40]:
len(vent_modes_ventilated), len(cases)

(388, 388)

In [41]:
vent_modes_ventilated

Unnamed: 0,IPPV,O2therapy,PSV,SIMV,SIMVPSV,SIPPV,VG_off,VG_on,total
AL000003,0.0,0.0,0.0,5160.0,0.0,0.0,5160.0,,5160
AL000006,0.0,0.0,0.0,0.0,2280.0,0.0,2280.0,,2280
AL000007,0.0,0.0,0.0,6660.0,0.0,0.0,6660.0,,6660
AL000008,0.0,0.0,0.0,6242.0,0.0,0.0,,6242.0,6242
AL000009,0.0,0.0,0.0,0.0,0.0,2250.0,2238.0,12.0,2250
...,...,...,...,...,...,...,...,...,...
AL001032,0.0,0.0,0.0,4852.0,0.0,0.0,,4852.0,4852
AL001037,0.0,0.0,0.0,0.0,4738.0,0.0,,4738.0,4738
AL001040,0.0,0.0,0.0,150.0,0.0,6084.0,,6234.0,6234
AL001055,0.0,0.0,0.0,0.0,0.0,1052.0,,1052.0,1052


In [42]:
print('SIMV:', sum(vent_modes_ventilated['SIMV'] > 0))
print('SIPPV:', sum(vent_modes_ventilated['SIPPV'] > 0))
# print('SIMVPSV:', sum(vent_modes_ventilated['SIMVPSV'] > 0))
# print('PSV:', sum(vent_modes_ventilated['PSV'] > 0))
print('IPPV:', sum(vent_modes_ventilated['IPPV'] > 0))
print('VG_on:', sum(vent_modes_ventilated['VG_on'] > 0))
print('total', len(vent_modes_ventilated))

SIMV: 256
SIPPV: 145
IPPV: 17
VG_on: 304
total 388


In [43]:
len(vent_modes_ventilated)

388

In [44]:
len(cases)

388

In [45]:
data_pars_measurements_ventilated = {rec : data_pars_measurements_ventilated[rec] for rec 
                                     in data_pars_measurements_ventilated
                                     if rec in cases}

data_pars_settings_ventilated = {rec : data_pars_settings_ventilated[rec] for rec 
                                     in data_pars_settings_ventilated
                                     if rec in cases}

data_pars_alarms_ventilated = {rec : data_pars_alarms_ventilated[rec] for rec 
                                     in data_pars_alarms_ventilated
                                     if rec in cases}

### Export trimmed DataFrames

In [47]:
with open(os.path.join(DATA_DUMP, 'data_pars_measurements_ventilated_1_1100.pickle'), 'wb') as handle:
    pickle.dump(data_pars_measurements_ventilated, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(DATA_DUMP, 'data_pars_settings_ventilated_1_1100.pickle'), 'wb') as handle:
    pickle.dump(data_pars_settings_ventilated, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(os.path.join(DATA_DUMP, 'data_pars_alarms_ventilated_1_1100.pickle'), 'wb') as handle:
    pickle.dump(data_pars_alarms_ventilated, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Export Dataframes containing ventilator modes to Excel files and pickle archives

In [50]:
writer = pd.ExcelWriter(os.path.join(DIR_WRITE, 'ventilation_modes_ventilated_1_1100.xlsx'))
vent_modes_ventilated.to_excel(writer, 'vent_modes_ventilated_1_1100')
total_duration_ventilated.to_excel(writer, 'total_duration_vent_1_1100')
writer.close()

In [51]:
with open(os.path.join(DATA_DUMP, 'vent_modes_ventilated_1_1100.pickle'), 'wb') as handle:
    pickle.dump(vent_modes_ventilated, handle, protocol=pickle.HIGHEST_PROTOCOL)