In [183]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta

### Reformat Ghent University Hospital dataset

In [184]:
import os
abs_dir = os.getcwd()
rel_dir = os.path.join(abs_dir, '../../data/raw/hospital/UZGent_full.xlsx')
df = pd.read_excel(rel_dir, sheet_name="Sheet1")

In [185]:
def isnat(your_datetime):
    dtype_string = str(your_datetime.dtype)
    if 'datetime64' in dtype_string or 'timedelta64' in dtype_string:
        return your_datetime.view('i8') == np.datetime64('NAT').view('i8')
    return False  # it can't be a NaT if it's not a dateime

In [186]:
age=[]
sex=[]
outcome=[]
stay_type=[]
dc=[]
dICU=[]
dICUrec=[]
n=[]

# drop empty entries in Outcome status/date from dataframe
df['Outcome status'].replace('', np.nan, inplace=True)
df['Outcome date'].replace('', np.nan, inplace=True)
df.dropna(subset=['Outcome status'], inplace=True)
df.dropna(subset=['Outcome date'], inplace=True)

# Conversion of dates to uniform datatype
df['Admission date IZ'] = df['Admission date IZ'].values.astype('datetime64[D]')
df['Stopdate IZ'] = df['Stopdate IZ'].values.astype('datetime64[D]')
df['Outcome date'] = df['Outcome date'].values.astype('datetime64[D]')

id_group=df.groupby(['UZCOVID_id'])
for g_idx, group in id_group:
    # filter out 'transfer/revalidatie' in column 'Outcome status'
    if (group['Outcome status'].values[0] != 'transfer/revalidatie'):
        # add patient age
        age.append(group['Age'].values[0])        
        # add patient sex
        if group['Sex'].values[0] == 'Male':
            sex.append('M')
        else:
            sex.append('F')
        # check if patient has recovered
        if group['Outcome status'].values[0] != 'overleden':
            outcome.append('R')
        else:
            outcome.append('D')
        # if the admission date to IZ has a value do,
        if not isnat(group['Admission date IZ'].values[0]):
            # patient had an ICU stay
            stay_type.append('ICU')
            # length of stay in ICU
            dICU.append((group['Stopdate IZ'].values[0] - group['Admission date IZ'].values[0]).astype('timedelta64[D]').astype(int))
            # length of stay after ICU
            dICUrec.append((group['Outcome date'].values[0] - group['Stopdate IZ'].values[0]).astype('timedelta64[D]').astype(int))
            # length of stay before ICU
            if (group['Admission date IZ'].values[0] - group['Admission date UZ Gent'].values[0]).astype('timedelta64[D]').astype(int) >= 0:
                dc.append((group['Admission date IZ'].values[0] - group['Admission date UZ Gent'].values[0]).astype('timedelta64[D]').astype(int))
            else:
                dc.append(0)
        else:
            # patient had a Cohort only stay
            stay_type.append('Cohort only')
            dc.append((group['Outcome date'].values[0] - group['Admission date UZ Gent'].values[0]).astype('timedelta64[D]').astype(int))
            dICU.append(0)
            dICUrec.append(0)
            
UZG_dict={
    'age': age,
    'sex': sex,
    'stay_type': stay_type,
    'outcome': outcome,
    'dC': dc,
    'dICU': dICU,
    'dICUrec': dICUrec
}

UZG_df = pd.DataFrame(UZG_dict)
UZG_df.tail()

Unnamed: 0,age,sex,stay_type,outcome,dC,dICU,dICUrec
172,52,F,Cohort only,R,16,0,0
173,51,M,Cohort only,R,3,0,0
174,51,M,ICU,R,5,2,6
175,51,M,Cohort only,R,11,0,0
176,51,M,Cohort only,R,1,0,0


In [208]:
d_C_to_ICU = UZG_df[(UZG_df.stay_type == 'ICU')].dC.mean()
pr

### Reformat AZMM dataset

In [187]:
import os
abs_dir = os.getcwd()
rel_dir = os.path.join(abs_dir, '../../data/raw/hospital/AZmariaMiddelares.xlsx')
df = pd.read_excel(rel_dir, sheet_name="Sheet1")

In [188]:
patients_dict={}
id_group=df.groupby(['Opnamenummer'])
for g_idx, group in id_group:
    # filter out 'ZZ-Niet ingevuld' in column 'Bestemming'
    if group['Bestemming na ontslag'].values[0] != 'ZZ-Niet ingevuld':
        # add patient ID
        patients_dict.update({g_idx: {}})
        # add patient age
        patients_dict[g_idx].update({'Age':group['Leeftijd'].values[0]})
        # add patient sex
        if group['Geslacht'].values[0] == 'Man':
            patients_dict[g_idx].update({'Sex': 'M'})
        else:
            patients_dict[g_idx].update({'Sex': 'F'})
        # add patient recovered or dead
        if group['Bestemming na ontslag'].values[0] != 'Overleden':
            patients_dict[g_idx].update({'Status': 'R'})
        else:
            patients_dict[g_idx].update({'Status': 'D'})
        # add patients wards as list, neglect emergency room and buffer
        wards=[]
        lengths=[]
        for r_idx, row in group.iterrows(): 
            if (group['Afdeling'][r_idx] != 'Spoed' and group['Afdeling'][r_idx] != 'Bufferafdeling'):
                wards.append(group['Afdeling'][r_idx])
                lengths.append(group['Ligduur'][r_idx])
        patients_dict[g_idx].update({'Wards': wards[::-1]})
        patients_dict[g_idx].update({'Lengths': lengths[::-1]})

### Convert to a better format

In [189]:
# age=[]
# sex=[]
# stay_type=[]
# dc=[]
# dICU=[]
# dICUrec=[]
# outcome=[]

for ID in patients_dict:
    # First append the age and sex of the patient
    age.append(patients_dict[ID]['Age'])
    sex.append(patients_dict[ID]['Sex'])
    # Then check if the patient recovered or not
    if patients_dict[ID]['Status'] == 'R':
        outcome.append('R')
    else:
        outcome.append('D')
    # if any of the patients entries was in an ICU ward do,
    if any(patients_dict[ID]['Wards'][idx] == 'IZ' for idx,ward in enumerate(patients_dict[ID]['Wards'])):
        stay_type.append('ICU')
        # loop over wards and indices
        for idx,ward in enumerate(patients_dict[ID]['Wards']):
            # get index of ICU in wards
            if ward == 'IZ':
                # if recovered
                if patients_dict[ID]['Status'] == 'R':
                    # check if patient had a recovery stay in cohort or not and extract total length
                    if len(patients_dict[ID]['Wards'])-1 > idx:
                        dICUrec.append(sum(patients_dict[ID]['Lengths'][idx+1:]))
                    else:
                        dICUrec.append(0)
                    # add patient data to lists
                    dc.append(0)
                    dICU.append(patients_dict[ID]['Lengths'][idx])
                    
                # if dead
                else:
                    # check if patient had a recovery stay in cohort or not and extract total length
                    if len(patients_dict[ID]['Wards'])-1 > idx:
                         dICUrec.append(sum(patients_dict[ID]['Lengths'][idx+1:]))
                    else:
                        dICUrec.append(0)
                    # add patient data to lists
                    dc.append(0)
                    dICU.append(patients_dict[ID]['Lengths'][idx])
    # if the patient didn't stay in ICU but did stay in Cohort
    elif any( (patients_dict[ID]['Wards'][idx] == 'Cohort-afdeling D501') or (patients_dict[ID]['Wards'][idx] == 'Cohort-afdeling D601') for idx,ward in enumerate(patients_dict[ID]['Wards'])):
        stay_type.append('Cohort only')
        dc.append(sum(patients_dict[ID]['Lengths']))
        dICU.append(0)
        dICUrec.append(0)
    else:
        stay_type.append('ER only')
        dc.append(0)
        dICU.append(0)
        dICUrec.append(0)

AZMM_dict={
    'age': age,
    'sex': sex,
    'stay_type': stay_type,
    'outcome': outcome,
    'dC': dc,
    'dICU': dICU,
    'dICUrec': dICUrec
}

AZMM_df = pd.DataFrame(AZMM_dict)
AZMM_df.tail()

Unnamed: 0,age,sex,stay_type,outcome,dC,dICU,dICUrec
365,82,F,Cohort only,R,5.083331,0.0,0.0
366,73,F,ICU,R,0.0,0.89375,9.095832
367,90,M,Cohort only,R,1.856249,0.0,0.0
368,55,F,Cohort only,R,3.697916,0.0,0.0
369,75,F,ER only,R,0.0,0.0,0.0


### Make an age-stratified parameter dataframe

In [190]:
age_classes = pd.IntervalIndex.from_tuples([(0,10),(10,20),(20,30),(30,40),(40,50),(50,60),(60,70),(70,80),(80,120)], 
                                           closed='left')
AZMM_df['age_class'] = pd.cut(AZMM_df.age, bins=age_classes)

In [191]:
hospital_parameters_age = pd.DataFrame(index=age_classes, 
                                   columns=['sample_size','c','m0','m0_{ICU}','m0_{C}','dC','dC_R','dC_D','dICU','dICU_R','dICU_D','dICUrec'])

In [192]:
hospital_parameters_age['sample_size']=AZMM_df.groupby(by='age_class').apply(lambda x: x.age.count())

In [207]:
# Fraction of patients that doesn't need ICU
hospital_parameters_age['c'] = AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[x.stay_type=='Cohort only'].age.count()/
                                          x[x.stay_type.isin(['ICU', 'Cohort only'])].age.count())
hospital_parameters_age['c']=hospital_parameters_age['c'].fillna(1)

# Mortality in ICU
hospital_parameters_age['m0']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[( (x.outcome=='D'))].age.count()/
                                          x[x.stay_type.isin(['ICU','Cohort only'])].age.count())

# Mortality in ICU
hospital_parameters_age['m0_{ICU}']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='ICU') & (x.outcome=='D'))].age.count()/
                                          x[x.stay_type.isin(['ICU'])].age.count())

# Mortality in Cohort
hospital_parameters_age['m0_{C}']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='Cohort only') & (x.outcome=='D'))].age.count()/
                                          x[x.stay_type.isin(['Cohort only'])].age.count())
hospital_parameters_age[['m0','m0_{ICU}','m0_{C}']]=hospital_parameters_age[['m0','m0_{ICU}','m0_{C}']].fillna(0)

# Hospitalisation length in Cohort
hospital_parameters_age['dC']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='Cohort only'))].dC.mean())

# Hospitalisation length in Cohort if recovered
hospital_parameters_age['dC_R']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='Cohort only') & (x.outcome=='R'))].dC.mean())

# Hospitalisation length in Cohort when deceased
hospital_parameters_age['dC_D']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='Cohort only') & (x.outcome=='D'))].dC.mean())

# Hospitalisation length in ICU
hospital_parameters_age['dICU']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='ICU'))].dICU.mean())

# Hospitalisation length in ICU if recovered
hospital_parameters_age['dICU_R']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='ICU') & (x.outcome=='R'))].dICU.mean())

# Hospitalisation length in ICU when deceased
hospital_parameters_age['dICU_D']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='ICU') & (x.outcome=='D'))].dICU.mean())

# Hospitalisation length in ICU when deceased
hospital_parameters_age['dICU_D']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='ICU') & (x.outcome=='D'))].dICU.mean())

# Hospitalisation length in ICU when deceased
hospital_parameters_age['dICUrec']=AZMM_df.groupby(by='age_class').apply(
                                lambda x: x[((x.stay_type=='ICU') )].dICUrec.mean())

hospital_parameters_age.to_html('age.html')

  lambda x: x[x.stay_type=='Cohort only'].age.count()/
  lambda x: x[( (x.outcome=='D'))].age.count()/
  lambda x: x[((x.stay_type=='ICU') & (x.outcome=='D'))].age.count()/
  lambda x: x[((x.stay_type=='Cohort only') & (x.outcome=='D'))].age.count()/


### Calculate the total averages of dC, dICU and dICUrec

In [205]:
dC = AZMM_df[AZMM_df.stay_type == 'Cohort only'].dC.mean()
dC_R = AZMM_df[((AZMM_df.stay_type == 'Cohort only')&(AZMM_df.outcome == 'R'))].dC.mean()
dC_D = AZMM_df[((AZMM_df.stay_type == 'Cohort only')&(AZMM_df.outcome == 'D'))].dC.mean()
dICU = AZMM_df[AZMM_df.stay_type == 'ICU'].dICU.mean()
dICU_R = AZMM_df[((AZMM_df.stay_type == 'ICU')&(AZMM_df.outcome == 'R'))].dICU.mean()
dICU_D = AZMM_df[((AZMM_df.stay_type == 'ICU')&(AZMM_df.outcome == 'D'))].dICU.mean()
dICUrec = AZMM_df[AZMM_df.stay_type == 'ICU'].dICUrec.mean()
print(dC,dC_R,dC_D)
print(dICU,dICU_R,dICU_D,dICUrec)

7.691700568702289 8.01029195412844 6.1132250681818165
12.51015754651163 9.866722145161289 19.339032333333336 6.4723349883720935


### Write results to Excel

In [206]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('twallema_AZMM_UZG.xlsx')

# Write each dataframe to a different worksheet.
AZMM_df.to_excel(writer,index=False)
# Close the Pandas Excel writer and output the Excel file.
writer.save()