![alt text](./Cerny_logo_1.jpg)

# Analysis of Cerny ventilation recordings

The data processed and analysed in this Notebook were collected by the **Neonatal Emergency and Transport Service of the Peter Cerny Foundation**, Budapest, Hungary

**Author: Dr Gusztav Belteki**


## Processing the blood gases for recordings `AL000001 - AL000300`

This notebook preprocesses the **blood gases** for ventilation recordings `AL000001 - AL000300`.

- Total: **300 cases**
- Clinical and appropriate ventilator data are available only for **242 cases**
- Blood cases not available in 11 cases; **231 cases remaining**

A dictionary containing the processed blood gas data exported as pickle archive: **blood_gases_1_300.pickle**

### Importing the necessary libraries and setting options

In [None]:
import IPython
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk

import os
import sys
import re
import pickle

from scipy import stats
from pandas import Series, DataFrame
from datetime import datetime, timedelta

%matplotlib inline
matplotlib.style.use('classic')
matplotlib.rcParams['figure.facecolor'] = 'w'

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
# pd.set_option('mode.chained_assignment', None) 

In [None]:
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("SciPy version: {}".format(sp.__version__))
print("IPython version: {}".format(IPython.__version__))
print("scikit-learn version: {}".format(sk.__version__))

### List and set the working directory and the directory to write out data

In [None]:
# Topic of the Notebook which will also be the name of the subfolder containing results
TOPIC = 'fabian'

# Name of the external hard drive
DRIVE = 'GUSZTI'

# Directory containing clinical and blood gas data
CWD = '/Users/guszti/ventilation_fabian'

# Directory on external drive to read the ventilation data from
DIR_READ = '/Volumes/%s/Fabian/fabian_patient_data' % DRIVE

DIR_WRITE = '%s/%s' % (CWD, 'Analyses')

# Images and raw data will be written on an external hard drive
if not os.path.isdir('/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC)):
    os.makedirs('/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC))
DATA_DUMP = '/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC)

In [None]:
os.chdir(CWD)
os.getcwd()

In [None]:
DIR_READ

In [None]:
DIR_WRITE

In [None]:
DATA_DUMP

### Import clinical DataFrame from pickle archive

In [None]:
with open('%s/%s.pickle' % (DATA_DUMP, 'clin_df_1_300'), 'rb') as handle:
    clin_df = pickle.load(handle)

In [None]:
cases = sorted(clin_df.index)

In [None]:
len(cases)

### Import all clinical data containing blood gases

In [None]:
# import text files in a dictionary
clin_dict = {}
for fname in os.listdir(DIR_READ):
    if not fname.startswith('.'): # disregard hidden files
        fhandle = open(os.path.join('%s' % DIR_READ, fname), 'r', encoding = 'cp1252')
        clin_dict[fname[:-4]] = fhandle.read() # use the filenames without the .txt extension as keys
        fhandle.close()

In [None]:
len(clin_dict)

In [None]:
clin_dict = {key: value for key, value in clin_dict.items() if key in cases }

In [None]:
len(clin_dict)

In [None]:
gas_dict = {}
# Remove clinical details preceding the blood gases

for key, value in clin_dict.items():
    try:
        gas_dict[key] = value[value.index('Astrup'):]
    except ValueError:
        print(key, 'has no blood gas')

In [None]:
len(gas_dict)

In [None]:
gas_dict_2 = {}

for key, value in gas_dict.items():
    gas_dict_2[key] = {}
    
    for i, gas in enumerate(value.split('Astrup')[1:]):
        gas_dict_2[key][i] = gas.split('\n')[1:-1]     

In [None]:
gas_dict_2 = {}

for key, value in gas_dict.items():
    gas_dict_2[key] = {}
    
    for i, gas in enumerate(value.split('Astrup')[1:]):
        gas_dict_2[key][i] = {}
        items = gas.split('\n')[1:-1]
        for item in items:
            name, value = item.split(':')
            if value.strip() == '':
                break
            else:
                gas_dict_2[key][i][name.strip()] = value.strip()

In [None]:
for case in gas_dict_2:
    for gas in sorted(gas_dict_2[case].keys()):
        if gas_dict_2[case][gas] == {}:
            del gas_dict_2[case][gas]

In [None]:
gas_frames = {}

for case in gas_dict_2.keys():
    gas_frames[case] = DataFrame(gas_dict_2[case])

In [None]:
def time_changer(rec):
    a = clin_df.loc[rec]['Recording start'].date()
    for column in gas_frames[rec]:
        b = gas_frames[rec][column]['Time']
        c = datetime.strptime(b, '%H%M').time()
        d = datetime.combine(a, c)
        gas_frames[rec][column]['Time'] = d
    

In [None]:
for case in cases:
    time_changer(case)

In [None]:
for case in cases:
    try:
        gas_frames[case] =  gas_frames[case].T.set_index('Time')
    
    except:
        print('No blood gas for %s' % case)
        del gas_frames[case]

In [None]:
len(gas_frames)

### Export bood gases as Excel files

In [None]:
# Save blood gases into a multi-sheet Excel file

writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'blood_gases_1_300.xlsx'))
for case in sorted(gas_frames.keys()):
    gas_frames[case].to_excel(writer, case)
writer.save()

### Export processed data as pickle files

In [None]:
with open('%s/%s.pickle' % (DATA_DUMP, 'blood_gases_1_300'), 'wb') as handle:
    pickle.dump(gas_frames, handle, protocol=pickle.HIGHEST_PROTOCOL)