# Setup Connection with DB

In [None]:
# import the depending libraries
import numpy as np
from psycopg2 import connect
import pandas as pd
import pm4py
import numpy as np
import pandasql as ps
from pm4py.objects.conversion.log import converter as log_converter
import datetime
import os
import time
import math

pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 50)

# Load all admissions from CSV

In [None]:
# import the MIMIC-IV data tables
# TODO: After getting access to MIMIC-IV, insert the tables as csv files in the 'datatables' folder
# or load the data tables in any other way.
folder = "datatables/"
drg_results = pd.read_csv(folder + 'drg_results.csv')
admission_results = pd.read_csv(folder + 'admission_results.csv')
icu_stay_results = pd.read_csv(folder + 'icu_stay_results.csv')
transfer_results = pd.read_csv(folder + 'transfer_results.csv')
patient_results = pd.read_csv(folder + 'patient_results.csv')
service_results = pd.read_csv(folder + 'service_results.csv')
procedures = pd.read_csv(folder + 'procedure_results.csv')
d_procedures = pd.read_csv(folder + 'd_procedure_results.csv')
poe_results = pd.read_csv(folder + 'poe_results.csv')
poe_detail_results = pd.read_csv(folder + 'poe_detail_results.csv')
icu_d_results = pd.read_csv(folder + 'icu_d_results.csv')
icd_procedureevents_results = pd.read_csv(folder + 'icd_procedureevents_results.csv')
d_diagnoses_icd_results = pd.read_csv(folder + 'd_diagnoses_icd_results.csv')
diagnoses_icd_results = pd.read_csv(folder + 'diagnoses_icd_results.csv')

# Event Extraction Functions

### Add admission events

In [None]:
# add the event type "admit", the starting point of the admission
def getAdmissionAdmits(event_log, _admissions):
    admission_admits = _admissions[["hadm_id", "admittime"]].rename(columns={"hadm_id":"caseID", "admittime":"time_stamp"})
    admission_admits['description']='admit'
    event_log = event_log.append(admission_admits, ignore_index=True)
    return event_log

# add the event type "discharge", the end of the admission
def getAdmissionDischarges(event_log, _admissions):
    admission_discharges = _admissions[["hadm_id", "dischtime"]].rename(columns={"hadm_id":"caseID", "dischtime":"time_stamp"})
    admission_discharges['description']='discharge'
    event_log = event_log.append(admission_discharges, ignore_index=True)
    return event_log

# add the event type "patient passed", the point of passing of a patient during the admission
def getAdmissionDeaths(event_log, _admissions):
    admission_deaths = _admissions[["hadm_id", "deathtime"]].rename(columns={"hadm_id":"caseID", "deathtime":"time_stamp"})
    admission_deaths['description']='patient passed'
    admission_deaths = admission_deaths[admission_deaths.time_stamp.notnull()]
    event_log = event_log.append(admission_deaths, ignore_index=True)
    return event_log

# add the event type "register in emergency department", the time of entering the emergency department
def getAdmissionEnterEDs(event_log, _admissions):
    admission_enter_ED = _admissions[["hadm_id", "edregtime"]].rename(columns={"hadm_id":"caseID", "edregtime":"time_stamp"})
    admission_enter_ED['description']='register in ED'
    admission_enter_ED = admission_enter_ED[admission_enter_ED.time_stamp.notnull()]
    event_log = event_log.append(admission_enter_ED, ignore_index=True)
    return event_log

# add the event type "leave emergency department", the time of leaving the emergency department
def getAdmissionLeaveEDs(event_log, _admissions):
    admission_leave_ED = _admissions[["hadm_id", "edouttime"]].rename(columns={"hadm_id":"caseID", "edouttime":"time_stamp"})
    admission_leave_ED['description']='leave ED'
    admission_leave_ED = admission_leave_ED[admission_leave_ED.time_stamp.notnull()]
    event_log = event_log.append(admission_leave_ED, ignore_index=True)
    return event_log

### Add icu_stay events

In [None]:
# add the event type "enter intensive care unit", the time of entering an intensive care unit
def getIcuEnters(event_log, _icu_stays):
    admission_enter_icu = _icu_stays[["hadm_id", "intime", "first_careunit"]].rename(columns={"hadm_id":"caseID", "intime":"time_stamp"})
    admission_enter_icu['description']='enter icu: ' + admission_enter_icu['first_careunit']
    admission_enter_icu = admission_enter_icu[admission_enter_icu.time_stamp.notnull()]
    admission_enter_icu = admission_enter_icu[["caseID", "time_stamp", "description"]]
    event_log = event_log.append(admission_enter_icu, ignore_index=True)
    return event_log

# add the event type "leave intensive care unit", the time of leaving an intensive care unit
def getIcuLeaves(event_log, _icu_stays):
    admission_leave_icu = _icu_stays[["hadm_id", "outtime", "last_careunit"]].rename(columns={"hadm_id":"caseID", "outtime":"time_stamp"})
    admission_leave_icu['description']='leave icu: ' + admission_leave_icu['last_careunit']
    admission_leave_icu = admission_leave_icu[admission_leave_icu.time_stamp.notnull()]
    admission_leave_icu = admission_leave_icu[["caseID", "time_stamp", "description"]]
    event_log = event_log.append(admission_leave_icu, ignore_index=True)
    return event_log

# add the event type "intensive care unit procedure", procedures conducted during intensive care with the description
def getIcuProcedures(event_log, _drg_icu_procedureevents):
    admission_icu_procedures = _drg_icu_procedureevents[["hadm_id", "starttime", "label"]].rename(columns={"hadm_id":"caseID", "starttime":"time_stamp", "label":"description"})
    admission_icu_procedures['description']='icu procedure: ' + admission_icu_procedures['description']
    admission_icu_procedures = admission_icu_procedures[admission_icu_procedures.time_stamp.notnull()]
    event_log = event_log.append(admission_icu_procedures, ignore_index=True)
    return event_log

### Add procedures

In [None]:
# returns the description of a specific procedure
def getProcedureDescription(long_title, icd_version, icd_code):
    if not pd.isna(long_title): 
        if 'hip' in long_title.lower() and 'replacement' in long_title.lower():
            return 'Procedure: Hip replacement'
        if 'hip' in long_title.lower() and 'drainage' in long_title.lower():
            return 'Procedure: Hip drainage'
        if 'hip' in long_title.lower() and 'removal of spacer' in long_title.lower():
            return 'Procedure: Hip spacer removal'
        if 'hip' in long_title.lower() and 'revision' in long_title.lower():
            return 'Procedure: Hip revision'
        if 'Insertion of Infusion' in long_title:
            return 'Procedure: Infusion insertion'
        if 'regional anesthetic' in long_title.lower():
            return 'Procedure: regional anesthesia'
    return 'Procedure: Other'

# add the event type "procedure", procedures conducted during the admission
def getProcedures(event_log, _drg_procedures):
    procedures_performed = _drg_procedures[["hadm_id", "chartdate", "icd_code", "icd_version", "long_title"]].rename(columns={"hadm_id":"caseID", "chartdate":"time_stamp"})
    procedures_performed['description']= procedures_performed["long_title"]
    for index, row in procedures_performed.iterrows():
        procedures_performed.at[index,'description'] = getProcedureDescription(row['long_title'], row['icd_version'], row['icd_code'])
    procedures_performed['time_stamp']= procedures_performed['time_stamp'].astype(str) + " 00:00:00"
    procedures_performed2=procedures_performed[['caseID', 'time_stamp', 'description']]
    event_log = event_log.append(procedures_performed2, ignore_index=True)
    return event_log

### Add service events

In [None]:
# add the event type "service", services conducted during the admission
def getServices(event_log, _services):
    admission_services = _services[["hadm_id", "transfertime", "curr_service"]].rename(columns={"hadm_id":"caseID", "transfertime":"time_stamp", "curr_service":"description"})
    admission_services['description']= "Service: " + admission_services['description'].astype(str)
    event_log = event_log.append(admission_services, ignore_index=True)
    return event_log

### Add transfer events

In [None]:
# returns the description of a specific transfer
def getTransferDescription(eventType, careunit):
    if pd.isnull(careunit):
        return "Transfer: " + eventType
    if 'ICU' in careunit:
        return 'ICU'
    if 'Med/Surg' in careunit:
        return 'Transfer: Surgery'
    if 'Surgery' in careunit:
        return 'Transfer: Surgery'
    if 'Medical/Surgical' in careunit:
        return 'Transfer: Surgery'
    if 'Hematology/Oncology' in careunit:
        return 'Transfer: Hermatology/Oncology'
    if 'Emergency Department' in careunit:
        return 'Transfer: Emergency Department'
    return "Transfer: " + careunit

# add the event type "transfer", transfers of patients between departments
def getTransfers(event_log, _transfers):
    admission_transfer_patient = _transfers[["hadm_id", "intime", "eventtype", "careunit"]].rename(columns={"hadm_id":"caseID", "intime":"time_stamp"})
    for index, row in admission_transfer_patient.iterrows():
        description = getTransferDescription(row['eventtype'], row['careunit'])
        admission_transfer_patient.at[index,'description'] = description
        if 'Transfer: transfer' in description:
            admission_transfer_patient = admission_transfer_patient.drop(index)
    admission_transfer_patient = admission_transfer_patient[['caseID', 'time_stamp', 'description']]
    event_log = event_log.append(admission_transfer_patient, ignore_index=True)
    return event_log

### Add poe events

In [None]:
# returns the description of a specific patient order entity
def getPoeDescription(order_type, order_subtype, field_name, field_value):
    if 'Radiology' in order_type:
        if 'Interventional' in str(order_subtype):
            return 'POE: Interventional Radiology'
        return 'Imaging' + ': ' + order_subtype
    if 'Product Order' in order_type:
        return 'POE: Transfusion'
    if 'bipap' in order_type.lower() or 'ventilation' in order_type.lower() or 'extubate' in order_type.lower():
        return 'POE: Ventilation & Airway Support'
    if 'IV fluids' in str(order_subtype):
        return 'POE: Infusion'
    if pd.isna(field_name) and pd.isna(field_value):
        if pd.isna(order_subtype):
            return 'POE: '+ order_type
        return order_type + ': ' + order_subtype
    if 'MICU' in field_value:
        return 'POE: ' + field_name + ': MICU'
    if 'CMED' in field_value:
        return 'POE: ' + field_name + ': CMED'
    if 'Admit to inpatient' in field_value:
        return 'POE: ' + order_subtype + ': ' + field_value
    if 'Consult Status Time' in field_name:
        return 'POE: Consult'
    return 'POE: ' + field_name + ': ' + field_value

# return whether two events should be considered as parallel, i.e. events that occur all the time and interfer with a comprehensible process
def isParallelEvent(eventDescription):
    if eventDescription in ['POE: Medications', 'POE: Lab']:
        return True
    return False

# add the event type "patient order entities", the care events that are ordered by physicians
def getPoes(event_log, _drg_poes, filterParallelEvents):
    admission_poes = _drg_poes[["hadm_id", "ordertime", "field_name", "field_value", "order_type", "order_subtype"]].rename(columns={"hadm_id":"caseID", "ordertime":"time_stamp"})
    admission_poes['description'] = admission_poes['field_name'] + ': ' + admission_poes['field_value']
    for index, row in admission_poes.iterrows():
        admission_poes.at[index,'description'] = getPoeDescription(row['order_type'], row['order_subtype'], row['field_name'], row['field_value'])
        if filterParallelEvents:
            if isParallelEvent(admission_poes.at[index,'description']):
                admission_poes = admission_poes.drop(index)
    admission_poes = admission_poes[['caseID', 'time_stamp', 'description']]
    event_log = event_log.append(admission_poes, ignore_index=True)
    return event_log

# Event Abstraction Functions

In [None]:
# sort all events and aggregate events that belong to discharges
def aggregateDischargesAndServices(event_log):
    # sort the log by the caseID and the timestamp
    event_log = event_log.sort_values(by=['caseID', 'time_stamp'])
    event_log = event_log.reset_index(drop=True)
    
    # iterate over all events and enrich them
    caseID = ''
    prefix = ''
    previous_description = ''
    previous_timestamp = ''
    previous_index = 0
    for index, row in event_log.iterrows():
        if caseID != row['caseID']:
            caseID = row['caseID']
            prefix = ''
        description = row['description']
        if ('Discharge Planning' in description or 'Discharge When' in description) and 'Admit to inpatient' in previous_description and previous_timestamp == row['time_stamp']:
            event_log = event_log.drop(previous_index)
        if ('Discharge Planning' in previous_description or 'Discharge When' in previous_description) and 'Admit to inpatient' in description and previous_timestamp == row['time_stamp']:
            event_log = event_log.drop(index)
            continue
        if 'Service:' in description and 'Service:' in previous_description and (pd.to_datetime(row['time_stamp'])-pd.to_datetime(previous_timestamp)).total_seconds() < 300:
            event_log = event_log.drop(index=previous_index)
        event_log.at[index,'description'] = prefix + row['description']
        previous_description = row['description']
        previous_timestamp = row['time_stamp']
        previous_index = index
    return event_log

# sort all events and normalize or delete certain events
def normalizeAndDeleteEvents(event_log):
    # sort the log by the caseID and the timestamp
    event_log = event_log.sort_values(by=['caseID', 'time_stamp'])
    event_log = event_log.reset_index(drop=True)
    
    # iterate over all events and normalize or delete them
    caseID = ''
    prefix = ''
    previous_description = ''
    previous_timestamp = ''
    previous_index = 0
    for index, row in event_log.iterrows():
        description = row['description']
        caseID = row['caseID']
        if ('Consult' in description or 'Level of Urgency' in description):
            event_log.at[index,'description'] = 'POE: Consult'
        if ('Cardiology' in description):
            event_log.at[index,'description'] = 'Cardiology'
        if ('Blood Bank' in description):
            event_log.at[index,'description'] = 'Blood Bank'
        if ('General Care' in previous_description and ('General Care' in description or 'Tubes & Drains' in description) and caseID == previous_caseID):
            event_log = event_log.drop(index)
            continue
        if ('General Care' in description or 'Tubes & Drains' in description):
            event_log.at[index,'description'] = 'General Care'
            previous_description = 'General Care'
            continue
        if ('Discharge' in description):
            event_log.at[index,'description'] = 'POE: Discharge'
        if ('POE: Consult' in previous_description and 'POE: Consult' in description and caseID == previous_caseID):
            event_log = event_log.drop(index)
            continue
        if ('POE: Discharge' in previous_description and 'POE: Discharge' in description and caseID == previous_caseID):
            event_log = event_log.drop(index)
            continue
        if ('IV therapy' in description):
            event_log = event_log.drop(index)
            continue
        if ('Nutrition:' in description):
            event_log = event_log.drop(index)
            continue
        if ('Respiratory:' in description):
            event_log = event_log.drop(index)
            continue
        if ('DNAR' in description):
            event_log = event_log.drop(index)
            continue
        if ('Cardiology' in description):
            event_log = event_log.drop(index)
            continue
        if ('Procedure: Other' in description):
            event_log = event_log.drop(index)
            continue
        if ('POE: Admit' in description or 'POE: Postop' in description):
            event_log = event_log.drop(index)
            continue
        if ('Blood' in description or 'Lab' in description or 'Infusion' in description):
            event_log = event_log.drop(index)
            continue
        if ('Medications' in description):
            event_log = event_log.drop(index)
            continue
        previous_description = description
        previous_index = index
        previous_caseID = caseID
    return event_log

# sort all events and aggregate events that belong to consults
def aggregateConsults(event_log):
    event_log = event_log.sort_values(by=['caseID', 'time_stamp'])
    event_log = event_log.reset_index(drop=True)
    
    # iterate over all events and fix them 
    caseID = ''
    prefix = ''
    previous_description = ''
    previous_timestamp = ''
    previous_index = 0
    for index, row in event_log.iterrows():
        description = row['description']
        caseID = row['caseID']
        if ('POE: Medications' in description):
            event_log = event_log.drop(index)
            continue
        if ('(IUC) - Foley' in description):
            event_log.at[index,'description'] = 'POE: Urinary Catheter'
        if ('POE: Consult' in description and 'POE: Consult' in previous_description and caseID == previous_caseID):
            event_log = event_log.drop(index)
            continue
        if ('within 24 hours' in description and 'POE: Consult' in previous_description and caseID == previous_caseID):
            event_log = event_log.drop(index)
            continue
        previous_description = description
        previous_index = index
        previous_caseID = caseID
    return event_log

# Export the Event-Log

In [None]:
# saves the given event log as a csv to local storage
def saveEventLog(_event_log, drgcode, severity, configuration, _folder_name, postfix):
    #create the event log file
    today=time.strftime('%Y%m%d')
    folder = str(today)
    if _folder_name:
        folder = _folder_name
    if not os.path.exists('eventlogs/'+folder):
        os.mkdir('eventlogs/'+folder)
    filename = "drg_" + str(drgcode) + "_" + str(severity) + "_age" + str(configuration.age_lower_bound) + "to" + str(configuration.age_upper_bound)
    #filename = f"eventlogs/{folder}/drg_{drgcode}_{severity}_age{configuration.age_lower_bound}to{configuration.age_upper_bound}"
    if configuration.gender:
        filename = filename + configuration.gender
    if postfix:
        sub_folder = filename
        if not os.path.exists('eventlogs/'+folder+'/'+sub_folder):
            os.mkdir('eventlogs/'+folder+'/'+sub_folder)
        filename = postfix.replace('/', '_')
        filename = "eventlogs/" + folder + "/" + sub_folder + "/" + filename + ".csv"
        _event_log.to_csv(filename, index=False)
        return
    filename = "eventlogs/" + folder + "/"  + filename + ".csv"
    _event_log.to_csv(filename, index=False)

# A class for event logs
class EventLog:
  def __init__(self, name):
    self.name = name
    self.event_log = pd.DataFrame(columns = ["caseID", "time_stamp", "description"])

# partitions the given event log by the surgery date.
# A log is created each for the events on the day of surgery, all previous and all following events.
def saveLogPerSurgeryDay(event_log, drgcode, severity, configuration, _folder_name):
    event_logs = []
    index_event_log = EventLog('index')
    current = 'preSurg'
    last_id = None
    last_procedure = None
    
    for index, row in event_log.iterrows():
        description = row['description']
        
        if not row['caseID'] == last_id:
            current = 'preSurg'
            last_procedure = None
        
        if ('Procedure: ' in row['description']):
            current = 'surg'
            last_procedure = row['time_stamp'] 
            event_log = event_log.drop(index) 
            last_id = row['caseID']
            index_event_log.event_log = index_event_log.event_log.append(pd.DataFrame([[row.caseID, row.time_stamp, current]], columns=['caseID', 'time_stamp', 'description']))
            continue
           
        if (not last_procedure == None):
            if ((current == 'surg') & (pd.to_datetime(last_procedure).dayofyear != pd.to_datetime(row['time_stamp']).dayofyear)):
                current = 'postSurg'
                index_event_log.event_log = index_event_log.event_log.append(pd.DataFrame([[row.caseID, row.time_stamp, current]], columns=['caseID', 'time_stamp', 'description']))
            
        el_index = None
        department_event_log = EventLog(current)
        
        hacky_idx = 0
        for el in event_logs:
            if el.name == current:
                el_index = hacky_idx
                department_event_log = el
                break
            hacky_idx += 1
        
        new_row = pd.DataFrame([row], columns=['caseID', 'time_stamp', 'description'])
        department_event_log.event_log = department_event_log.event_log.append(new_row, ignore_index=True)
        
        if not el_index == None:
            event_logs[el_index] = department_event_log
        else:
            event_logs.append(department_event_log)
        
        last_id = row['caseID']
        
    saveEventLog(index_event_log.event_log, drgcode, severity, configuration, _folder_name, 'index')
    for el in event_logs:
        saveEventLog(el.event_log, drgcode, severity, configuration, _folder_name, el.name)


# partitions the given event log by the departments, where the events occurred, and saves a separate log for each
def saveLogPerDepartment(event_log, drgcode, severity, configuration, _folder_name):
    event_logs = []
    index_event_log = EventLog('index')
    last_department = 'none'
    last_id = None
    
    for index, row in event_log.iterrows():
        description = row['description']
        
        if not row['caseID'] == last_id:
            last_department = 'none'
        
        if ('Transfer: ' in row['description']):
            helper = description.split(': ')
            last_department = helper[1] if len(helper) == 2 else 'none'
            index_event_log.event_log = index_event_log.event_log.append(pd.DataFrame([[row.caseID, row.time_stamp, last_department]], columns=['caseID', 'time_stamp', 'description']))
            
        el_index = None
        department_event_log = EventLog(last_department)
        
        hacky_idx = 0
        for el in event_logs:
            if el.name == last_department:
                el_index = hacky_idx
                department_event_log = el
                break
            hacky_idx += 1
        
        new_row = pd.DataFrame([row], columns=['caseID', 'time_stamp', 'description'])
        department_event_log.event_log = department_event_log.event_log.append(new_row, ignore_index=True)
        
        if not el_index == None:
            event_logs[el_index] = department_event_log
        else:
            event_logs.append(department_event_log)
        
        last_id = row['caseID'] 
    
    saveEventLog(index_event_log.event_log, drgcode, severity, configuration, _folder_name, 'index')
    for el in event_logs:
        saveEventLog(el.event_log, drgcode, severity, configuration, _folder_name, el.name)

# generate the event log 
def generate_event_log(drgcode, severity, configuration, _folder_name):
    #get the hadm_id
    drgresult = drg_results.loc[(drg_results["drg_code"] == drgcode) & (drg_results["drg_severity"] == severity)]
    
    admissions = admission_results.merge(drgresult, on=["hadm_id","subject_id"], how="inner")
    admissions = patient_results.merge(admissions, on=["subject_id"], how="inner")
    admissions = admissions[(configuration.age_lower_bound <= admissions["anchor_age"]) & (admissions["anchor_age"] <= configuration.age_upper_bound)]
    if configuration.gender:
        admissions = admissions[admissions["gender"] == configuration.gender]
    if configuration.diagnosis:
        admissions = admissions.merge(diagnoses_icd_results, on=["hadm_id", "subject_id"], how="left")
        admissions = admissions[admissions["icd_code"].str.contains(configuration.diagnosis)]
    
    #save the hadm_ids
    hadm_ids = list(admissions["hadm_id"])
    
    #filters the datatables
    icu_stays = icu_stay_results[icu_stay_results["hadm_id"].isin(hadm_ids)]
    transfers = transfer_results[transfer_results["hadm_id"].isin(hadm_ids)]
    services = service_results[service_results["hadm_id"].isin(hadm_ids)]
    drg_poes = poe_results_301[poe_results_301["hadm_id"].isin(hadm_ids)]
    drg_procedures = procedures[procedures["hadm_id"].isin(hadm_ids)].merge(d_procedures, how="left", on=["icd_code","icd_version"])
    drg_icu_procedureevents = icd_procedureevents_results[icd_procedureevents_results["hadm_id"].isin(hadm_ids)].merge(icu_d_results, how="left", on=["itemid"])
    
    #initialize empty log
    column_names = ["caseID", "time_stamp", "description"]
    new_event_log = pd.DataFrame(columns = column_names)
    
    #add desired events to the new log
    #new_event_log = getAdmissionAdmits(new_event_log, admissions)
    #new_event_log = getAdmissionDischarges(new_event_log, admissions)
    #new_event_log = getAdmissionDeaths(new_event_log, admissions)
    #new_event_log = getAdmissionEnterEDs(new_event_log, admissions)
    #new_event_log = getAdmissionLeaveEDs(new_event_log, admissions)
    #new_event_log = getIcuEnters(new_event_log, icu_stays)
    #new_event_log = getIcuLeaves(new_event_log, icu_stays)
    #new_event_log = getIcuProcedures(new_event_log, drg_icu_procedureevents)
    #new_event_log = getTransfers(new_event_log, transfers)
    new_event_log = getPoes(new_event_log, drg_poes, filterParallelEvents)
    new_event_log = getProcedures(new_event_log, drg_procedures)
    #new_event_log = getServices(new_event_log, services)
    new_event_log = aggregateDischargesAndServices(new_event_log)
    #new_event_log = normalizeAndDeleteEvents(new_event_log)
    new_event_log = customFixes2(new_event_log)
    
    
    if configuration.generatePerDepartment:
        saveLogPerDepartment(new_event_log, drgcode, severity, configuration, _folder_name)
    elif configuration.generatePerSurgeryDay:
        saveLogPerSurgeryDay(new_event_log, drgcode, severity, configuration, _folder_name)
    else: 
        saveEventLog(new_event_log, drgcode, severity, configuration, _folder_name, None)

# Generate the Event-Log

In [None]:
# generate the event log
# extract events according to the provided config
# choose the needed event extraction functions
# correlate the events by the admission ID
# choose the needed event abstraction functions
# export the resulting event log
def generate_event_log(drgcode, severity, configuration, _folder_name):
    #get the hadm_id
    drgresult = drg_results.loc[(drg_results["drg_code"] == drgcode) & (drg_results["drg_severity"] == severity)]
    
    admissions = admission_results.merge(drgresult, on=["hadm_id","subject_id"], how="inner")
    admissions = patient_results.merge(admissions, on=["subject_id"], how="inner")
    admissions = admissions[(configuration.age_lower_bound <= admissions["anchor_age"]) & (admissions["anchor_age"] <= configuration.age_upper_bound)]
    if configuration.gender:
        admissions = admissions[admissions["gender"] == configuration.gender]
    if configuration.diagnosis:
        admissions = admissions.merge(diagnoses_icd_results, on=["hadm_id", "subject_id"], how="left")
        admissions = admissions[admissions["icd_code"].str.contains(configuration.diagnosis)]
    
    #save the hadm_ids
    hadm_ids = list(admissions["hadm_id"])
    
    #filters the datatables
    icu_stays = icu_stay_results[icu_stay_results["hadm_id"].isin(hadm_ids)]
    transfers = transfer_results[transfer_results["hadm_id"].isin(hadm_ids)]
    services = service_results[service_results["hadm_id"].isin(hadm_ids)]
    drg_poes = poe_results_301[poe_results_301["hadm_id"].isin(hadm_ids)]
    drg_procedures = procedures[procedures["hadm_id"].isin(hadm_ids)].merge(d_procedures, how="left", on=["icd_code","icd_version"])
    drg_icu_procedureevents = icd_procedureevents_results[icd_procedureevents_results["hadm_id"].isin(hadm_ids)].merge(icu_d_results, how="left", on=["itemid"])
    
    #initialize empty log
    column_names = ["caseID", "time_stamp", "description"]
    new_event_log = pd.DataFrame(columns = column_names)
    
    #add desired events to the new log
    #new_event_log = getAdmissionAdmits(new_event_log, admissions)
    #new_event_log = getAdmissionDischarges(new_event_log, admissions)
    #new_event_log = getAdmissionDeaths(new_event_log, admissions)
    #new_event_log = getAdmissionEnterEDs(new_event_log, admissions)
    #new_event_log = getAdmissionLeaveEDs(new_event_log, admissions)
    #new_event_log = getIcuEnters(new_event_log, icu_stays)
    #new_event_log = getIcuLeaves(new_event_log, icu_stays)
    #new_event_log = getIcuProcedures(new_event_log, drg_icu_procedureevents)
    #new_event_log = getTransfers(new_event_log, transfers)
    new_event_log = getPoes(new_event_log, drg_poes, filterParallelEvents)
    new_event_log = getProcedures(new_event_log, drg_procedures)
    #new_event_log = getServices(new_event_log, services)
    
    # abstract events
    new_event_log = aggregateDischargesAndServices(new_event_log)
    #new_event_log = normalizeAndDeleteEvents(new_event_log)
    new_event_log = customFixes2(new_event_log)
    
    # export event log
    if configuration.generatePerDepartment:
        saveLogPerDepartment(new_event_log, drgcode, severity, configuration, _folder_name)
    elif configuration.generatePerSurgeryDay:
        saveLogPerSurgeryDay(new_event_log, drgcode, severity, configuration, _folder_name)
    else: 
        saveEventLog(new_event_log, drgcode, severity, configuration, _folder_name, None)

In [None]:
# Define the config
# generate an event log for each drg and each severity group

### FILTER
#drgcode
# urinary 690
# hip 301
# appendectomy 225
# heart failure 190
# heart failure 194
drgcodes = [301]

#addseverities
severities = [1.0, 2.0, 3.0, 4.0]

#gender
gender = False

#folder name
folder_name = None

#diagnosis primäre coxarthrose
diagnosis = "M16"
#diagnosis = False

generatePerDepartment = False
generatePerSurgeryDay = False
filterParallelEvents = False

class Config:
  def __init__(self, age_lower_bound, age_upper_bound, gender, generatePerDepartment, generatePerSurgeryDay, diagnosis, filterParallelEvents ):
    self.age_lower_bound = age_lower_bound
    self.age_upper_bound = age_upper_bound
    self.gender = gender
    self.generatePerDepartment = generatePerDepartment
    self.generatePerSurgeryDay = generatePerSurgeryDay
    self.diagnosis = diagnosis
    self.filterParallelEvents = filterParallelEvents

configurations = [
    Config(0, 99, gender, generatePerDepartment, generatePerSurgeryDay, diagnosis, filterParallelEvents),
]

for _drgcode in drgcodes:
    for _severity in severities:  
        for configuration in configurations:
            generate_event_log(_drgcode, _severity, configuration, folder_name)