# Notebook 1: predicting missing values for activity

## Section 1: Loading event log

In [None]:
import pandas as pd
# loading event log using HADM_ID (hospital admission ID) as subject
event_log = pd.read_csv("event_logs\HADM_ID_v3.csv", parse_dates=["timestamp"])

event_log["timestamp"] = pd.to_datetime(event_log["timestamp"])
event_log.sort_values(by=['patient'])
# event_log
# notice how some there are already some missing time stamps
# this 
# event_log.info()

## Section 2: process mining

In [None]:
import pm4py as pm

event_log = pm.format_dataframe(event_log, case_id='patient', activity_key='activity', timestamp_key='timestamp')
# lets get some information about the event log
n_events = len(event_log)
n_cases = len(event_log.patient.unique())
start_events = pm.get_start_activities(event_log)
end_events = pm.get_end_activities(event_log)
print(f"Number of cases: {n_cases}\nNumber of events: {n_events}")
print(f"Start events: {start_events}\nEnd events: {end_events}")


In [None]:
# most cases start with a person being admitted to the hospital
# interestingly, there are two cases were someone was discharged at first, these two cases might be anomalous. 
# lets first filter by these two cases:
filter_by_discharged = pm.filter_start_activities(event_log, ['Discharged'])
filter_by_discharged
# event traces are as follows:
# discharged --> admitted --> transfer --> entered ICU --> Left ICU --> Dead


In [None]:
net, initial_marking, final_marking = pm.discover_petri_net_alpha(event_log)

In [None]:
import graphviz
pm.view_petri_net(petri_net=net)