![alt text](./Cerny_logo_1.jpg)

# Analysis of Cerny ventilation recordings

## Processing blood gases

This notebook imports and processes blood gas data and exports it into a pickle archive.

### Importing the necessary libraries and setting options

In [1]:
import IPython
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk

import os
import sys
import re
import pickle

from scipy import stats
from pandas import Series, DataFrame
from datetime import datetime, timedelta

%matplotlib inline
matplotlib.style.use('classic')
matplotlib.rcParams['figure.facecolor'] = 'w'

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
# pd.set_option('mode.chained_assignment', None) 

In [2]:
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("SciPy version: {}".format(sp.__version__))
print("IPython version: {}".format(IPython.__version__))
print("scikit-learn version: {}".format(sk.__version__))

Python version: 3.7.9 (default, Aug 31 2020, 07:22:35) 
[Clang 10.0.0 ]
pandas version: 1.1.3
matplotlib version: 3.3.2
NumPy version: 1.19.2
SciPy version: 1.5.2
IPython version: 7.19.0
scikit-learn version: 0.23.2


### List and set the working directory and the directory to write out data

In [3]:
# Topic of the Notebook which will also be the name of the subfolder containing results
TOPIC = 'fabian'

# Name of the external hard drive
DRIVE = 'GUSZTI'

# Directory containing clinical and blood gas data
CWD = '/Users/guszti/ventilation_fabian'

# Directory on external drive to read the ventilation data from
DIR_READ = '/Volumes/%s/Fabian/fabian_patient_data_all' % DRIVE

DIR_WRITE = '%s/%s' % (CWD, 'Analyses')

# Images and raw data will be written on an external hard drive
if not os.path.isdir('/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC)):
    os.makedirs('/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC))
DATA_DUMP = '/Volumes/%s/data_dump/%s' % (DRIVE, TOPIC)

In [4]:
os.chdir(CWD)
os.getcwd()

'/Users/guszti/ventilation_fabian'

In [5]:
DIR_READ, DIR_WRITE, DATA_DUMP

('/Volumes/GUSZTI/Fabian/fabian_patient_data_all',
 '/Users/guszti/ventilation_fabian/Analyses',
 '/Volumes/GUSZTI/data_dump/fabian')

### Import clinical DataFrame from pickle archive

In [6]:
with open('%s/%s.pickle' % (DATA_DUMP, 'clin_df_1_1100'), 'rb') as handle:
    clin_df = pickle.load(handle)

In [7]:
cases = sorted(clin_df.index)

In [8]:
len(cases)

792

### Import all clinical data containing blood gases

In [9]:
# import text files in a dictionary
clin_dict = {}
for fname in os.listdir(DIR_READ):
    if not fname.startswith('.'): # disregard hidden files
        fhandle = open(os.path.join('%s' % DIR_READ, fname), 'r', encoding = 'cp1252')
        clin_dict[fname[:-4]] = fhandle.read() # use the filenames without the .txt extension as keys
        fhandle.close()

In [10]:
len(clin_dict)

925

In [11]:
clin_dict = {key: value for key, value in clin_dict.items() if key in cases }

In [12]:
len(clin_dict)

792

In [13]:
gas_dict = {}
# Remove clinical details preceding the blood gases
for key, value in clin_dict.items():
    try:
        gas_dict[key] = value[value.index('Astrup'):]
    except ValueError:
        print(key, 'has no blood gas')

In [14]:
len(gas_dict)

792

In [15]:
gas_dict_2 = {}

for key, value in gas_dict.items():
    gas_dict_2[key] = {}
    
    for i, gas in enumerate(value.split('Astrup')[1:]):
        gas_dict_2[key][i] = {}
        items = gas.split('\n')[1:-1]
        for item in items:
            name, value = item.split(':')
            if value.strip() == '':
                break
            else:
                gas_dict_2[key][i][name.strip()] = value.strip()

In [16]:
for case in gas_dict_2:
    for gas in sorted(gas_dict_2[case].keys()):
        if gas_dict_2[case][gas] == {}:
            del gas_dict_2[case][gas]

In [17]:
gas_frames = {}

for case in gas_dict_2.keys():
    gas_frames[case] = DataFrame(gas_dict_2[case])

In [18]:
def time_changer(rec):
    a = clin_df.loc[rec]['Recording start'].date()
    for column in gas_frames[rec]:
        b = gas_frames[rec][column]['Time']
        c = datetime.strptime(str(b), '%H%M').time()
        # This str() is needed here because AL000665 (and only that) is interpreted as Datetime
        d = datetime.combine(a, c)
        gas_frames[rec][column]['Time'] = d  

In [19]:
for case in cases: 
    # AL000310 has an abnormal time stamp for first gas ("70:19")
    if case == 'AL000310':
        continue
    #print(case)
    time_changer(case)

In [20]:
for case in cases:
    try:
        gas_frames[case] =  gas_frames[case].T.set_index('Time')
    
    except:
        print('No blood gas for %s' % case)
        del gas_frames[case]

No blood gas for AL000042
No blood gas for AL000051
No blood gas for AL000070
No blood gas for AL000133
No blood gas for AL000144
No blood gas for AL000169
No blood gas for AL000172
No blood gas for AL000290
No blood gas for AL000294
No blood gas for AL000308
No blood gas for AL000333
No blood gas for AL000353
No blood gas for AL000360
No blood gas for AL000369
No blood gas for AL000428
No blood gas for AL000436
No blood gas for AL000440
No blood gas for AL000464
No blood gas for AL000471
No blood gas for AL000475
No blood gas for AL000492
No blood gas for AL000504
No blood gas for AL000537
No blood gas for AL000548
No blood gas for AL000554
No blood gas for AL000557
No blood gas for AL000606
No blood gas for AL000627
No blood gas for AL000653
No blood gas for AL000667
No blood gas for AL000668
No blood gas for AL000683
No blood gas for AL000703
No blood gas for AL000748
No blood gas for AL000758
No blood gas for AL000777
No blood gas for AL000783
No blood gas for AL000794
No blood gas

In [21]:
len(gas_frames)

737

### Export bood gases as Excel files

In [22]:
# Save blood gases into a multi-sheet Excel file

writer = pd.ExcelWriter('%s/%s' % (DIR_WRITE, 'blood_gases_1_1100.xlsx'))
for case in sorted(gas_frames.keys()):
    gas_frames[case].to_excel(writer, case)
writer.save()

### Export processed data as pickle files

In [23]:
with open('%s/%s.pickle' % (DATA_DUMP, 'blood_gases_1_1100'), 'wb') as handle:
    pickle.dump(gas_frames, handle, protocol=pickle.HIGHEST_PROTOCOL)