In [1]:
import pandas as pd 
import glob 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

In [2]:
scenario_1, scenario_2 = {}, {}
sce_paths= "../data/raw/{}/csv/*.csv"

def read_files_to_dataframe(path):
    df = {}
    for file in glob.glob(path):
        tmp_df = pd.read_csv(file)
        file_sep = file.split(os.sep)
        df_name = file_sep[len(file_sep) - 1].split('.')[0]
        df[df_name] = tmp_df
    return df

scenario_1 = read_files_to_dataframe(sce_paths.format("scenario01"))
scenario_2 = read_files_to_dataframe(sce_paths.format("scenario02"))

### Last Encounters 

In [3]:
encounters_1 = scenario_1['encounters']
encounters_1['row_number'] = encounters_1.sort_values(['START'], ascending=[False]) \
             .groupby(['PATIENT']) \
             .cumcount() + 1
last_encounters_1 = encounters_1[encounters_1['row_number'] == 1]

In [4]:
encounters_2 = scenario_2['encounters']
encounters_2['row_number'] = encounters_2.sort_values(['START'], ascending=[False]) \
             .groupby(['PATIENT']) \
             .cumcount() + 1
last_encounters_2 = encounters_2[encounters_2['row_number'] == 1]

### Patients last encounters

In [13]:
threshold_death = 15 # 7 or 15
patients_1 = scenario_1['patients']
patients_1_last_encounter = patients_1.merge(last_encounters_1, left_on='Id', \
                                         right_on='PATIENT', suffixes=('_patient', '_encounter'))


patients_1_last_encounter['death_threshold'] = pd.to_datetime(patients_1_last_encounter['DEATHDATE']).dt.tz_localize(None) - \
pd.to_datetime(patients_1_last_encounter['START']).dt.tz_localize(None) <= datetime.timedelta(days=threshold_death)

patients_1_encouterclasses = encounters_1.groupby(['PATIENT', 'ENCOUNTERCLASS']).size().unstack('ENCOUNTERCLASS', fill_value=0).reset_index()
patients_1_last_encounter = patients_1_last_encounter.merge(patients_1_encouterclasses, left_on='Id_patient', \
                              right_on='PATIENT', suffixes=('_patient', '_encounter_classes'))

In [14]:
patients_2 = scenario_2['patients']
patients_2_last_encounter = patients_2.merge(last_encounters_2, left_on='Id', \
                                         right_on='PATIENT', suffixes=('_patient', '_encounter'))


patients_2_last_encounter['death_threshold'] = pd.to_datetime(patients_2_last_encounter['DEATHDATE']).dt.tz_localize(None) - \
pd.to_datetime(patients_2_last_encounter['START']).dt.tz_localize(None) <= datetime.timedelta(days=threshold_death)

patients_2_encouterclasses = encounters_2.groupby(['PATIENT', 'ENCOUNTERCLASS']).size().unstack('ENCOUNTERCLASS', fill_value=0).reset_index()
patients_2_last_encounter = patients_2_last_encounter.merge(patients_2_encouterclasses, left_on='Id_patient', \
                              right_on='PATIENT', suffixes=('_patient', '_encounter_classes'))

### Patients last condition and encounter

In [15]:
conditions_1 = scenario_1['conditions']
conditions_1['row_number'] = conditions_1.sort_values(['STOP', 'START'], ascending=[False, False]) \
             .groupby(['PATIENT']) \
             .cumcount() + 1
last_conditions_1 = conditions_1[conditions_1['row_number'] == 1]

#patient_last_conditions_1 = last_conditions_1.groupby(['PATIENT', 'CODE']).size(). \
#unstack('CODE', fill_value=0).reset_index()

patients_last_encounter_conditions_1 = patients_1_last_encounter.merge(last_conditions_1[['PATIENT', 'CODE']], left_on='Id_patient', \
                              right_on='PATIENT', suffixes=('_patient_encounter_classes', '_conditions'))

In [16]:
conditions_2 = scenario_2['conditions']
conditions_2['row_number'] = conditions_2.sort_values(['STOP', 'START'], ascending=[False, False]) \
             .groupby(['PATIENT']) \
             .cumcount() + 1
last_conditions_2 = conditions_2[conditions_2['row_number'] == 1]

#patient_last_conditions_2 = last_conditions_2.groupby(['PATIENT', 'CODE']).size(). \
#unstack('CODE', fill_value=0).reset_index()

patients_last_encounter_conditions_2 = patients_2_last_encounter.merge(last_conditions_2[['PATIENT', 'CODE']], left_on='Id_patient', \
                              right_on='PATIENT', suffixes=('_patient_encounter_classes', '_conditions'))

In [17]:
patients_last_encounter_conditions_1['death_threshold'].value_counts()

False    966
True     174
Name: death_threshold, dtype: int64

In [18]:
patients_last_encounter_conditions_2['death_threshold'].value_counts()

False    987
True     121
Name: death_threshold, dtype: int64

In [19]:
patients_last_encounter_conditions_1.to_csv("../data/processed/patients_last_encounter_conditions_{}days_scenario1.csv".format(threshold_death), index = False)

In [20]:
patients_last_encounter_conditions_2.to_csv("../data/processed/patients_last_encounter_conditions_{}days_scenario2.csv".format(threshold_death), index = False)