<img src="./University_Debrecen_logo.jpg" alt="Drawing" style="width: 200px;"/>

# Processing the transcutaneous data obtained from TCM5 monitors

#### Author: Dr Gusztav Belteki

### 1. Import the required modules

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from matplotlib import dates

import pickle
import sys
import copy
import os
import gc
import copy
from collections import defaultdict
from datetime import datetime

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 100)

pd.options.mode.chained_assignment = None

In [None]:
print(f'Python version: {sys.version}')
print(f'pandas version: {pd.__version__}')
print(f'matplotlib version: {mpl.__version__}')
print(f'numPy version: {np.__version__}')

### 2. List and set the working directory and the directory to write out data

In [None]:
# Topic of the Notebook which will also be the name of the subfolder containing results
TOPIC = 'tcCO2'

# Path to clinical data and to folder to export results to
PATH = os.path.join(os.sep, 'Users', 'guszti', 'Library', 'Mobile Documents', 'com~apple~CloudDocs', 
                            'Documents', 'Research', 'Ventilation')

# Name of the external hard drive
DRIVE = 'Guszti'

# Directory containing clinical and blood gas data
DIR_READ_CLIN = os.path.join(PATH, 'ventilation_draeger_debrecen')

# Directory containing clinical and blood gas data
DIR_READ_VENT =  os.path.join(os.path.join(os.sep, 'Volumes', DRIVE, 'data_dump', 'draeger_debrecen', 'analysis_all'))

# Folder on a USB stick to export data to
DATA_DUMP = os.path.join(os.sep, 'Volumes', DRIVE, 'data_dump', 'draeger_debrecen', TOPIC)
os.makedirs(DATA_DUMP, exist_ok = True)

In [None]:
DIR_READ_CLIN, DIR_READ_VENT, DATA_DUMP

### 3. Import processed blood gases and transcutaneous data

In [None]:
with open(os.path.join(DIR_READ_VENT, 'blood_gases_processed.pickle'), 'rb') as handle:
    blood_gases = pickle.load(handle)

In [None]:
blood_gases.keys()

In [None]:
with open(os.path.join(DIR_READ_VENT, 'transcutaneous_data.pickle'), 'rb') as handle:
    tcCO2 = pickle.load(handle)

In [None]:
tcCO2.keys()

In [None]:
for patient in tcCO2:
    tcCO2[patient] = tcCO2[patient].reset_index().set_index('Date/Time')

In [None]:
tcCO2['LVD016'].head(10)

In [None]:
tcCO2['LVD003'].info()

In [None]:
tcCO2['LVD003'].describe()

### 4. Visualise transcutaneous CO2 data

There are periods with very low tcCO2 - they are artifacts

In [None]:
filetype = 'jpg'
dpi = 300
majorFmt = dates.DateFormatter('%d/%m\n%H:%M')

for recording in tcCO2:
    fig = plt.figure()
    fig.set_size_inches(12, 4)
    ax = fig.add_subplot(1, 1, 1)
    data = tcCO2[recording]['pCO2 [mmHg]']
    data.plot(ax = ax, color = 'red', x_compat = True)
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.set_xlabel('Time (hours)', size = 12, color = 'black')
    ax.set_ylabel('mmHg', size = 12, color = 'black')
    ax.set_title(recording,  size = 12, color = 'black')
    ax.legend(['tcCO$_2$'], ncol = 2, fontsize = 12, loc = 'best')
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
        
    fig.savefig(os.path.join(DATA_DUMP, f'{recording}_tcCO2_raw_data_unedited.{filetype}'), 
        dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,)
    
    if recording != 'LVD003':
        plt.close()

### 5. Remove artifacts from tcCO2 data

In [None]:
# Resample tcCO2 data as average over 10 minute periods
tcCO2_10_min_mean = {}

for patient in tcCO2:
    # The timestamp will mark the end of the period
    tcCO2_10_min_mean[patient] = tcCO2[patient].resample('10min', label='right').mean()

In [None]:
filetype = 'jpg'
dpi = 300
majorFmt = dates.DateFormatter('%d/%m\n%H:%M')

for recording in tcCO2:
    fig = plt.figure()
    fig.set_size_inches(12, 4)
    ax = fig.add_subplot(1, 1, 1)
    data = tcCO2_10_min_mean[recording]['pCO2 [mmHg]']
    data.plot(ax = ax, color = 'red', x_compat = True)
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.set_xlabel('Time (hours)', size = 12, color = 'black')
    ax.set_ylabel('mmHg', size = 12, color = 'black')
    ax.set_title(recording,  size = 12, color = 'black')
    ax.legend(['tcCO$_2$'], ncol = 2, fontsize = 12, loc = 'best')
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
        
    fig.savefig(os.path.join(DATA_DUMP, f'{recording}_tcCO2_10min_min_unedited.{filetype}'), 
        dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,)
    
    if recording != 'LVD003':
        plt.close()

In [None]:
def data_range(data):
    data = data.dropna()
    return data.max() - data.min()

In [None]:
# Data_range of tcCO2 data over 10-minute periods
tcCO2_10min_data_range = {}

for patient in tcCO2:
    tcCO2_10min_data_range[patient] = tcCO2[patient]['pCO2 [mmHg]'].resample('10min', label='right').apply(data_range)

In [None]:
filetype = 'jpg'
dpi = 300
majorFmt = dates.DateFormatter('%d/%m\n%H:%M')

for recording in tcCO2:
    fig = plt.figure()
    fig.set_size_inches(12, 4)
    ax = fig.add_subplot(1, 1, 1)
    data = tcCO2_10min_data_range[recording]
    data.plot(ax = ax, color = 'red', x_compat = True)
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.set_xlabel('Time (hours)', size = 12, color = 'black')
    ax.set_ylabel('mmHg', size = 12, color = 'black')
    ax.set_title(recording,  size = 12, color = 'black')
    ax.legend(['tcCO$_2$'], ncol = 2, fontsize = 12, loc = 'best')
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
        
    fig.savefig(os.path.join(DATA_DUMP, f'{recording}_tcCO2_10min_data_range_unedited.{filetype}'), 
        dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,)
    
    if recording != 'LVD003':
        plt.close()

In [None]:
# Generate mask to only include 10-minute periods when the range of tcCO2 was <15 mmHg during the period

tcCO2_10min_mean_clean = {}

for patient in tcCO2:
    data = tcCO2_10_min_mean[patient]
    mask  = tcCO2_10min_data_range[patient][tcCO2_10min_data_range[patient] < 15]
    tcCO2_10min_mean_clean[patient] = data.reindex(mask.index)

In [None]:
filetype = 'jpg'
dpi = 300
majorFmt = dates.DateFormatter('%d/%m\n%H:%M')

for recording in tcCO2_10min_mean_clean:
    fig = plt.figure()
    fig.set_size_inches(12, 4)
    ax = fig.add_subplot(1, 1, 1)
    data = tcCO2_10min_mean_clean[recording]['pCO2 [mmHg]']
    data.plot(ax = ax, color = 'red', x_compat = True)
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.xaxis.set_major_formatter(majorFmt)
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, fontsize = 12, horizontalalignment = 'center')
    ax.set_xlabel('Time (hours)', size = 12, color = 'black')
    ax.set_ylabel('mmHg', size = 12, color = 'black')
    ax.set_ylim(0,120)
    ax.set_title(recording,  size = 12, color = 'black')
    ax.legend(['tcCO$_2$'], ncol = 2, fontsize = 12, loc = 'best')
    ax.grid('on', linestyle='-', linewidth=0.5, color = 'gray')
        
    fig.savefig(os.path.join(DATA_DUMP, f'{recording}_tcCO2_10min_mean_clean.{filetype}'), 
        dpi = dpi, format = filetype, bbox_inches='tight', pad_inches=0.1,)
    
    if recording != 'LVD003':
        plt.close()

In [None]:
tcCO2_10min_mean_clean['LVD001'].head()

In [None]:
tcCO2_10min_mean_clean['LVD001'].info()

### 6. Combine tcCO2 with blood gas CO2 and export them as pickle archive

In [None]:
blood_gases['LVD017'].index

In [None]:
blood_gases['LVD017'].index.shift(periods=5, freq='min')

In [None]:
len(blood_gases['LVD001'])

In [None]:
# Resample blood gas DataFrame

blood_gases_10min = {}

for patient in blood_gases:
    if not blood_gases[patient].empty:
        #print(patient)
        # Round up the time of the blood gases to the next 10 minutes 
        new_index = blood_gases[patient].index.shift(periods=5, freq='min').round(freq='10min')
        blood_gases_10min[patient] = blood_gases[patient].set_index(new_index)

In [None]:
blood_gases_10min['LVD017'].info()

In [None]:
tcCO2_10_min_mean.keys()

In [None]:
tcCO2_processed = {}

for patient in blood_gases_10min:
    if patient in tcCO2_10_min_mean:
        tcCO2_processed[patient] = pd.merge(tcCO2_10min_mean_clean[patient]['pCO2 [mmHg]'], 
            blood_gases_10min[patient]['PCO2'], how='outer', left_index=True, right_index=True)
        tcCO2_processed[patient].columns  = ['tcCO2', 'pCO2']

In [None]:
tcCO2_processed.keys()

In [None]:
tcCO2_processed['LVD017'].head()

In [None]:
tcCO2_processed['LVD016'].info()

In [None]:
# Export processed transcutaneous data as pickle archives
with open(os.path.join(DATA_DUMP, 'transcutaneous_data_processed.pickle'), 'wb') as handle:
    pickle.dump(tcCO2_processed, handle, protocol=pickle.HIGHEST_PROTOCOL)