<img src="./University_Debrecen_logo.jpg" alt="Drawing" style="width: 200px;"/>

# Import and processes clinical details and exports them as Excel and csv files.

#### Author: Dr Gusztav Belteki

### 1. Import the required modules

In [None]:
import os
import sys
from collections import defaultdict
from datetime import datetime

import pandas as pd
from pandas import DataFrame

import warnings
warnings.filterwarnings('ignore')

In [None]:
print(f'Python version: {sys.version}')
print(f'pandas version: {pd.__version__}')

### 2. List and set the working directory and the directory to write out data

In [None]:
# Path to clinical data and to folder to export results to
PATH = os.path.join(os.sep, 'Users', 'guszti', 'Library', 'Mobile Documents', 'com~apple~CloudDocs', 
                            'Documents', 'Research', 'Ventilation')

# Name of the external hard drive
DRIVE = 'Guszti'

# Directory containing clinical and blood gas data
DIR_READ_CLIN = os.path.join(PATH, 'ventilation_draeger_debrecen')

# Folder on external drive to read the ventilation data from
DIR_READ_VENT =  os.path.join(os.path.join(os.sep, 'Volumes', DRIVE, 'Draeger_debrecen'))

# Folder on a USB stick to export data to
DATA_DUMP = os.path.join(os.sep, 'Volumes', 'Guszti', 'data_dump', 'draeger_debrecen', 'analysis_individual')
os.makedirs(DATA_DUMP, exist_ok = True)

In [None]:
DIR_READ_CLIN, DIR_READ_VENT, DATA_DUMP

### 3. Import the list of available recordings
This recording list is produced by the `recording_list.ipynb` notebook

In [None]:
with open(os.path.join(os.sep, DIR_READ_CLIN, 'patient_list_all_debrecen.txt'), 'r') as fhandle:
    patients_all = fhandle.read().split(' ')[:-1]

print(patients_all)

In [None]:
len(patients_all)

### 4. Import and process clinical data

In [None]:
def time_changer(strng):
    date = strng.split('_')[0]
    time = f'{strng[11:13]}:{strng[13:15]}:{strng[15:17]}'
    return date + ' ' + time

In [None]:
clinical_details = pd.read_excel(os.path.join(DIR_READ_CLIN, 'ventilation_debrecen_patient_data.xlsx'),)

clinical_details = clinical_details.set_index(['Patient', 'Recording'], drop=False)
# Unless the weight and other variables changed, used the previous one
clinical_details = clinical_details.fillna(method='ffill')
clinical_details['Recording start'] = pd.to_datetime(clinical_details['Recording'].apply(time_changer))
for column in ['Gestation at birth', 'Birth weight', 'Current weight', 'ETT size',]:
    clinical_details[column] = clinical_details[column].astype('float')
for column in ['Date of birth']:
    clinical_details[column] = clinical_details[column].astype('datetime64')
# LVD020 was recorded with the clock of computer being 28.5 days behind the actual time
clinical_details.loc['LVD020', 'Recording start'] = clinical_details.loc['LVD020', 'Recording start'].values + \
   pd.to_timedelta(28.5 * 24 * 3600 * 1E9)

clinical_details['Postnatal age'] = (clinical_details['Recording start'] - clinical_details['Date of birth'])
clinical_details['Corrected gestation'] = \
    clinical_details['Gestation at birth'] + clinical_details['Postnatal age'].astype(int) / (1E+9 * 3600 * 24 * 7)
clinical_details['Corrected gestation'] = round(clinical_details['Corrected gestation'], 2)
clinical_details = clinical_details.set_index(['Patient', 'Recording'])

clinical_details.head()

In [None]:
clinical_details.info()

In [None]:
set(clinical_details.index.levels[0]) - set(patients_all)

In [None]:
# Only keep data which are the same in all recordings of the same patient
columns_to_keep = ['Gestation at birth', 'Date of birth', 'Birth weight', 'Current weight', 
                   'Recording start', 'Postnatal age', 'Corrected gestation', 'Main diagnoses', 
                   'Mode of intubation', 'ETT size', 'ETT insertion length', 'Comment', ]

clinical_details_patients = clinical_details.groupby(by='Patient').first()
clinical_details_patients = clinical_details_patients[columns_to_keep]
clinical_details_patients.head()

In [None]:
clinical_details_patients.info()

In [None]:
clinical_details_patients.describe()

### 5. Categorize patients according their gestation at birth and postnatal age _at the start of the recording_

In [None]:
term_babies = clinical_details_patients[clinical_details_patients['Gestation at birth'] >= 37]
len(term_babies)

In [None]:
preterm_babies = clinical_details_patients[clinical_details_patients['Gestation at birth'] < 37]
len(preterm_babies)

In [None]:
less_than_34 = clinical_details_patients[clinical_details_patients['Gestation at birth'] < 34]
len(less_than_34)

In [None]:
less_than_28 = clinical_details_patients[clinical_details_patients['Gestation at birth'] < 28]
len(less_than_28)

In [None]:
less_than_26 = clinical_details_patients[clinical_details_patients['Gestation at birth'] < 26]
len(less_than_26)

In [None]:
LBW = clinical_details_patients[clinical_details_patients['Birth weight'] < 2500]
len(LBW)

In [None]:
VLBW = clinical_details_patients[clinical_details_patients['Birth weight'] < 1500]
len(VLBW)

In [None]:
XLBW = clinical_details_patients[clinical_details_patients['Birth weight'] < 1000]
len(XLBW)

In [None]:
writer = pd.ExcelWriter(os.path.join(DIR_READ_CLIN, 'clinical_details_debrecen.xlsx'))
clinical_details.to_excel(writer,'recordings')
clinical_details_patients.to_excel(writer,'patients')
term_babies.to_excel(writer,'term_babies')
preterm_babies.to_excel(writer,'preterm_babies')
less_than_34.to_excel(writer,'less_than_34')
less_than_28.to_excel(writer,'less_than_28')
less_than_26.to_excel(writer,'less_than_26')
LBW.to_excel(writer,'LBW')
VLBW.to_excel(writer,'VLBW')
XLBW.to_excel(writer,'XLBW')
writer.save()

In [None]:
clinical_details.to_csv(os.path.join(DIR_READ_CLIN, 'clinical_details_recordings.csv'))
clinical_details_patients.to_csv(os.path.join(DIR_READ_CLIN, 'clinical_details_patients.csv'))

In [None]:
for recording in sorted(clinical_details_patients.index):
    
    if not os.path.isdir(os.path.join(DATA_DUMP, recording)):
        os.makedirs(os.path.join(DATA_DUMP, recording))

    fileout = open(os.path.join(DATA_DUMP, recording, '%s_%s.%s' % (recording, 'clinical_info', 'txt')), 'w')
    fileout.write('Recording:                    %s\n\n' % recording)
    fileout.write('Gestation:                    %0.1f weeks\n' % 
                clinical_details_patients.loc[recording]['Gestation at birth'])
    fileout.write('Postnatal age at start:       %s\n' % clinical_details_patients.loc[recording]['Postnatal age'])
    fileout.write('Corrected gestation at start: %0.1f weeks\n' % 
                clinical_details_patients.loc[recording]['Corrected gestation'])
    fileout.write('Birth weight:                 %d grams\n' % clinical_details_patients.loc[recording]['Birth weight'])
    fileout.write('Weight at start:              %d grams\n' % clinical_details_patients.loc[recording]['Current weight'])
    fileout.write('Diagnoses:                    %s\n' % clinical_details_patients.loc[recording]['Main diagnoses'])
    fileout.write('Comments:                     %s\n' % clinical_details_patients.loc[recording]['Comment'])
    fileout.close()