![alt text](./Cerny_logo_1.jpg)

# Analysis of Cerny ventilation recordings


**Author: Dr Gusztav Belteki**

### 1. Importing the necessary libraries and setting options

This notebook preprocessed and analyses the ventilator parameter data obtained with **0.5Hz sampling rate** from the Fabian ventilators at the Cerny neonatal transport service. It exports desrciptive statistics into Excel files and the preprocessed data as pickle archive.

In [None]:
import IPython
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt

import os
import sys
import re
import pickle
import datetime

from scipy import stats
from pandas import Series, DataFrame
from datetime import datetime, timedelta

%matplotlib inline

matplotlib.style.use('classic')
matplotlib.rcParams['figure.facecolor'] = 'w'

pd.set_option('display.max_rows', 300)
pd.set_option('display.max_columns', 300)

In [None]:
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("SciPy version: {}".format(sp.__version__))
print("IPython version: {}".format(IPython.__version__))

### 2. List and set the working directory and the directory to write out data

In [None]:
# Topic of the Notebook which will also be the name of the subfolder containing results
TOPIC = 'accelerometer_ventilated'
# Name of the external hard drive
DRIVE = 'GUSZTI'
# Folder containing subfolders with the annotation files
DIR_READ = '/Volumes/%s/Fabian' % DRIVE

CWD = '/Users/guszti/ventilation_fabian'

DIR_WRITE = '%s/%s/%s' % (CWD, 'Analyses', TOPIC)
DATA_DUMP = '/Volumes/%s/data_dump/%s' % (DRIVE, 'fabian')

In [None]:
os.chdir(CWD)
os.getcwd()

In [None]:
DIR_READ, DIR_WRITE

### 3. Import ventilator slow data and accelerometer data from pickle archives

In [None]:
with open('%s/%s.pickle' % (DATA_DUMP, 'combined_ventilated_1min'), 'rb') as handle:
    combined_ventilated_1min = pickle.load(handle)
    
with open('%s/%s.pickle' % (DATA_DUMP, 'combined_ventilated_1min_all'), 'rb') as handle:
    combined_ventilated_1min_all = pickle.load(handle)
    
with open('%s/%s.pickle' % (DATA_DUMP, 'low_1_vibr_frame'), 'rb') as handle:
    low_1_vibr_frame = pickle.load(handle)

with open('%s/%s.pickle' % (DATA_DUMP, 'high_1_vibr_frame'), 'rb') as handle:
    high_1_vibr_frame = pickle.load(handle)
    
with open('%s/%s.pickle' % (DATA_DUMP, 'low_1_accel_frame'), 'rb') as handle:
    low_1_accel_frame = pickle.load(handle)
    
with open('%s/%s.pickle' % (DATA_DUMP, 'high_1_accel_frame'), 'rb') as handle:
    high_1_accel_frame = pickle.load(handle)

In [None]:
(len(combined_ventilated_1min), len(combined_ventilated_1min_all))

In [None]:
recs = sorted(set(key[1] for key in combined_ventilated_1min.keys()))
#print(recs)

In [None]:
(len(low_1_vibr_frame), len(high_1_vibr_frame), len(low_1_accel_frame), len(high_1_accel_frame))

### 4. Import ventilator fast data

In [None]:
%%time

fast_data = {}

for rec in recs:
    if 150 < int(rec[-3:]) <= 300:
        path = '%s/%s/%s' % (DIR_READ, 'fabian_ventilator_data_151_300', rec)
    elif 300 < int(rec[-3:]) <= 450:
        path = '%s/%s/%s' % (DIR_READ, 'fabian_ventilator_data_301_450', rec)
    elif 450 < int(rec[-3:]) <= 600:
        path = '%s/%s/%s' % (DIR_READ, 'fabian_ventilator_data_451_600', rec)
    elif 600 < int(rec[-3:]) <= 750:
        path = '%s/%s/%s' % (DIR_READ, 'fabian_ventilator_data_601_750', rec)
    elif 750 < int(rec[-3:]) <= 999:
        path = '%s/%s/%s' % (DIR_READ, 'fabian_ventilator_data_751_900', rec)
    
    print(rec)
    # Only import fast data not the annotation files
    files = [file for file in os.listdir(path) if 'waves' in file and 'predicted' not in file
                                                                  and 'config' not in file]
    fast_data[rec] = []
    for file in files:
        print('Importing %s' % file)
        fast_data[rec].append(pd.read_csv('%s/%s' % (path, file), index_col = 0, parse_dates = [1],
                                                             infer_datetime_format=True))   
    fast_data[rec] = pd.concat(fast_data[rec])
    fast_data[rec] = fast_data[rec].set_index('Time')

### 5. Import clinical data

In [None]:
with open('%s/%s.pickle' % (DATA_DUMP, 'clin_df_1_1100'), 'rb') as handle:
    clin_df = pickle.load(handle)

In [None]:
len(clin_df)

In [None]:
rec_list = DataFrame([rec for rec in recs], columns = ['Recording'])
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'accelerometer_vent_final_rec_list.xlsx'))
rec_list.to_excel(writer, 'rec_list')
writer.save()

### 6. Generate composite P-V curves for the minute with the lowest and the highest vibration for each recording and export them as images

In [None]:
lowest_1_ind = {}; highest_1_ind = {}

frmt = '%Y-%m-%d %H:%M:%S.%f'

for recording in recs:
    
    low_start = low_1_vibr_frame.loc[recording]['datetime']
    # 1-minute period
    lowest_1_ind[recording] = (datetime.strftime(low_start, format = frmt),
        datetime.strftime(low_start + pd.to_timedelta(59000000000), format = frmt))
                                
    high_start = high_1_vibr_frame.loc[recording]['datetime']
    # 1-minute period
    highest_1_ind[recording] = (datetime.strftime(high_start, format = frmt),
            datetime.strftime(high_start + pd.to_timedelta(59000000000), format = frmt))


In [None]:
%%time

fast_data_highest_accel = {}; fast_data_lowest_accel = {}

for recording in recs:
    #print(recording)
    
    start, end = highest_1_ind[recording]
    fast_data_highest_accel[recording] = fast_data[recording].loc[start : end]
        
    start, end = lowest_1_ind[recording]
    fast_data_lowest_accel[recording] = fast_data[recording].loc[start : end]

In [None]:
if not os.path.isdir(os.path.join(DATA_DUMP, 'accelero_ventilated_graphs', 'accelero_fast_data_loops')):
    os.makedirs(os.path.join(DATA_DUMP, 'accelero_ventilated_graphs', 'accelero_fast_data_loops'))

##### Example PV loop

In [None]:
filetype = 'jpg'
dpi = 300
recording = ('AL000800')

fig, axes = plt.subplots(nrows=1, ncols=2, figsize = [10,5])
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, hspace=0.3, wspace=0.1)

axes[0].plot(fast_data_highest_accel[recording]['Pressure'], 
             fast_data_highest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'blue')
xlim_high = fast_data_highest_accel[recording]['Pressure'].mean() * 3
ylim_high = 8
    
axes[0].set_xlim(0, xlim_high)
axes[0].set_ylim(0, ylim_high)
axes[0].set_title('acceleration = %s m/sec$^2$' %  round(high_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
axes[0].grid(True)
axes[0].set_xlabel('Pressue (cmH$_2$O)')
axes[0].set_ylabel('Volume (mL/kg)')


axes[1].plot(fast_data_lowest_accel[recording]['Pressure'], 
             fast_data_lowest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'blue')
xlim_high = fast_data_lowest_accel[recording]['Pressure'].mean() * 3
ylim_high = 8
    
axes[1].set_xlim(0, xlim_high)
axes[1].set_ylim(0, ylim_high)
axes[1].set_title('acceleration = %s m/sec$^2$' %  round(low_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
axes[1].grid(True)
axes[1].set_xlabel('Pressue (cmH$_2$O)')
        
fig.savefig('%s/%s_PV.%s' % (os.path.join(DATA_DUMP, 'accelero_ventilated_graphs', 'accelero_fast_data_loops'), 
    recording, filetype), dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1)

### 6. Study the disorder of loops

It is expressed as the number of P-V data pairs during the minute

In [None]:
fast_data_highest_accel['AL000264']

In [None]:
pv_pairs_high = {}
pv_pairs_high_count = {}

for recording in fast_data_highest_accel:
    if len(fast_data_highest_accel[recording]) == 0:
        continue 
    else:
        pv_pairs_high[recording] = set()
        for item in fast_data_highest_accel[recording].itertuples():
            pv_pairs_high[recording].add((item[1], item[3]))
        pv_pairs_high_count[recording] = len(pv_pairs_high[recording])                                               

In [None]:
pv_pairs_low = {}
pv_pairs_low_count = {}

for recording in fast_data_lowest_accel:
    if len(fast_data_lowest_accel[recording]) == 0:
        continue 
    else:
        pv_pairs_low[recording] = set()
        for item in fast_data_lowest_accel[recording].itertuples():
            pv_pairs_low[recording].add((item[1], item[3]))
        pv_pairs_low_count[recording] = len(pv_pairs_low[recording])                                               

In [None]:
pv_pairs_high_count;

In [None]:
pv_pairs_count_frme = DataFrame([pv_pairs_low_count, pv_pairs_high_count]).T
pv_pairs_count_frme.columns = ['low_accel', 'high_accel']
pv_pairs_count_frme['diff'] = pv_pairs_count_frme['high_accel'] - pv_pairs_count_frme['low_accel'] 

In [None]:
pv_pairs_count_frme

In [None]:
len(pv_pairs_count_frme)

In [None]:
writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'PV_loop_complexity.xlsx'))
pv_pairs_count_frme.to_excel(writer, 'PV_loop_complexity')
writer.save()

In [None]:
stats.ttest_rel(pv_pairs_count_frme['low_accel'], pv_pairs_count_frme['high_accel'])

In [None]:
stats.wilcoxon(pv_pairs_count_frme['low_accel'], pv_pairs_count_frme['high_accel'])

In [None]:
plt.boxplot(pv_pairs_count_frme);

In [None]:
pv_pairs_count_frme.describe()

## Figures for paper

### Figure 3

#### Figure 3AB

In [None]:
filetype = 'jpg'
dpi = 300
recording = ('AL000673')

fig, axes = plt.subplots(nrows=1, ncols=2, figsize = [10,5])
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, hspace=0.3, wspace=0.1)

axes[0].plot(fast_data_lowest_accel[recording]['Pressure'], 
             fast_data_lowest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'blue')
xlim_high = fast_data_lowest_accel[recording]['Pressure'].mean() * 3
ylim_high = 8
axes[0].set_xlim(0, xlim_high)
axes[0].set_ylim(0, ylim_high)
axes[0].set_title('acceleration = %s m/sec$^2$' %  round(low_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
axes[0].grid(True)
axes[0].set_xlabel('Pressure (cmH$_2$O)')
axes[0].set_ylabel('Volume (mL/kg)')
ct_low = pv_pairs_count_frme.loc[recording]['low_accel']
axes[0].text(2, 7, f'P-V data pairs: {ct_low}', fontsize = 12)
    
axes[1].plot(fast_data_highest_accel[recording]['Pressure'], 
             fast_data_highest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'blue')
xlim_high = fast_data_highest_accel[recording]['Pressure'].mean() * 3
ylim_high = 8
axes[1].set_xlim(0, xlim_high)
axes[1].set_ylim(0, ylim_high)
axes[1].set_title('acceleration = %s m/sec$^2$' %  
                round(high_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
axes[1].grid(True)
axes[1].set_xlabel('Pressure (cmH$_2$O)')
ct_high = pv_pairs_count_frme.loc[recording]['high_accel']
axes[1].text(2, 7, f'P-V data pairs: {ct_high}', fontsize = 12)
        
fig.savefig('%s/%s.%s' % (DIR_WRITE, 'Figure_3AB', 'jpg'),
    dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,);

#### Figure 3CD

In [None]:
filetype = 'jpg'
dpi = 300
recording = ('AL000366')

fig, axes = plt.subplots(nrows=1, ncols=2, figsize = [10,5])
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, hspace=0.3, wspace=0.1)

axes[0].plot(fast_data_lowest_accel[recording]['Pressure'], 
             fast_data_lowest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'blue')
xlim_high = fast_data_lowest_accel[recording]['Pressure'].mean() * 4
ylim_high = 8
axes[0].set_xlim(0, xlim_high)
axes[0].set_ylim(0, ylim_high)
axes[0].set_title('acceleration = %s m/sec$^2$' %  round(low_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
axes[0].grid(True)
axes[0].set_xlabel('Pressure (cmH$_2$O)')
axes[0].set_ylabel('Volume (mL/kg)')
ct_low = pv_pairs_count_frme.loc[recording]['low_accel']
axes[0].text(2, 7, f'P-V data pairs: {ct_low}', fontsize = 12)
    
axes[1].plot(fast_data_highest_accel[recording]['Pressure'], 
             fast_data_highest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'blue')
xlim_high = fast_data_highest_accel[recording]['Pressure'].mean() * 4
ylim_high = 8
axes[1].set_xlim(0, xlim_high)
axes[1].set_ylim(0, ylim_high)
axes[1].set_title('acceleration = %s m/sec$^2$' %  
                round(high_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
axes[1].grid(True)
axes[1].set_xlabel('Pressure (cmH$_2$O)')
ct_high = pv_pairs_count_frme.loc[recording]['high_accel']
axes[1].text(2, 7, f'P-V data pairs: {ct_high}', fontsize = 12)
        
fig.savefig('%s/%s.%s' % (DIR_WRITE, 'Figure_3CD', 'jpg'),
    dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,);

#### Figure 3 combined

In [None]:
filetype = 'pdf'
dpi = 300

fig, axes = plt.subplots(nrows=2, ncols=2, figsize = [8,8])
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, hspace=0.2, wspace=0.2)

recording = ('AL000673')

# Figure 3A

axes[0,0].plot(fast_data_lowest_accel[recording]['Pressure'], 
fast_data_lowest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000), color = 'gray')
xlim_high = fast_data_lowest_accel[recording]['Pressure'].mean() * 3
axes[0,0].set_xlim(0, xlim_high)
axes[0,0].set_ylim(0, 9)
axes[0,0].set_title('vibration = %s m/sec$^2$' %  
    round(low_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
#axes[0,0].grid(True)
axes[0,0].set_xlabel('')
axes[0,0].set_ylabel('Volume (mL/kg)')
ct_low = pv_pairs_count_frme.loc[recording]['low_accel']
axes[0,0].text(2, 8, f'P-V data pairs: {ct_low}', fontsize = 12)

# Figure 3B
    
axes[0,1].plot(fast_data_highest_accel[recording]['Pressure'], 
    fast_data_highest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'gray')
xlim_high = fast_data_highest_accel[recording]['Pressure'].mean() * 3
ylim_high = 9
axes[0,1].set_xlim(0, xlim_high)
axes[0,1].set_ylim(0, ylim_high)
axes[0,1].set_title('vibrarion = %s m/sec$^2$' %  
    round(high_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
#axes[0,1].grid(True)
axes[0,1].set_xlabel('')
ct_high = pv_pairs_count_frme.loc[recording]['high_accel']
axes[0,1].text(2, 8, f'P-V data pairs: {ct_high}', fontsize = 12)


recording = ('AL000366')

# Figure 3C

axes[1,0].plot(fast_data_lowest_accel[recording]['Pressure'], 
             fast_data_lowest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000), color = 'gray')
xlim_high = fast_data_lowest_accel[recording]['Pressure'].mean() * 4
ylim_high = 9
axes[1,0].set_xlim(0, xlim_high)
axes[1,0].set_ylim(0, ylim_high)
axes[1,0].set_title('vibration = %s m/sec$^2$' %  round(low_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
#axes[1,0].grid(True)
axes[1,0].set_xlabel('Pressure (cmH$_2$O)')
axes[1,0].set_ylabel('Volume (mL/kg)')
ct_low = pv_pairs_count_frme.loc[recording]['low_accel']
axes[1,0].text(2, 8, f'P-V data pairs: {ct_low}', fontsize = 12)


# Figure 3D

axes[1,1].plot(fast_data_highest_accel[recording]['Pressure'], 
             fast_data_highest_accel[recording]['Volume'] / (clin_df.loc[recording]['Weight'] / 1000),color = 'gray')
xlim_high = fast_data_highest_accel[recording]['Pressure'].mean() * 4
ylim_high = 9
axes[1,1].set_xlim(0, xlim_high)
axes[1,1].set_ylim(0, ylim_high)
axes[1,1].set_title('vibration = %s m/sec$^2$' %  
                round(high_1_vibr_frame.loc[recording]['length_median_high_pass'], 2), fontsize = 12)
#axes[1,1].grid(True)
axes[1,1].set_xlabel('Pressure (cmH$_2$O)')
ct_high = pv_pairs_count_frme.loc[recording]['high_accel']
axes[1,1].text(2, 8, f'P-V data pairs: {ct_high}', fontsize = 12)

fig.text(0.03, 0.92, 'A', fontsize = 16); fig.text(0.49, 0.92, 'B', fontsize = 16)
fig.text(0.03, 0.47, 'C', fontsize = 16); fig.text(0.49, 0.47, 'D', fontsize = 16)
        
fig.savefig('%s/%s.%s' % (DIR_WRITE, 'Figure_3', filetype),
    dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,);