In [40]:

import pandas as pd
import numpy as np 


In [41]:
df = pd.read_csv(r'C:\Users\eelha081\OneDrive - University of Ottawa\Jupyter_Notebooks\SLR_Process_Pre\Triage_Room_Process-eventlog.csv')
print(df.head(10))

     Case ID Activity                             Activity Label  \
0  202501011        A                         Initial Assessment   
1  202501011        D                         Doctors Assessment   
2  202501011        E            Further Diagnostics and Imaging   
3  202501011        G           Treatment and Monitor Conditions   
4  202501011        H                        Transfer to the ICU   
5  202501012        A                         Initial Assessment   
6  202501012        B                              Immediate CPR   
7  202501012        C               Stabilization and Monitoring   
8  202501012        E            Further Diagnostics and Imaging   
9  202501012        F  Transfer to Surgery or Advanced Treatment   

        time:timestamp  Heart_Rate  Oxygen_Saturation Blood_Pressure  Age  \
0  2025-01-01T11:35:00         108               90.2          99/61   19   
1  2025-01-01T11:42:00         108               90.2          99/61   19   
2  2025-01-01T12:40:

In [42]:
print(df.columns)

Index(['Case ID', 'Activity', 'Activity Label', 'time:timestamp', 'Heart_Rate',
       'Oxygen_Saturation', 'Blood_Pressure', 'Age', 'Consciousness_Level',
       'PHI', 'Imaging_Results'],
      dtype='object')


In [47]:
# Step 1: Ensure timestamp column is in datetime format and sort the DataFrame
df['time:timestamp'] = pd.to_datetime(df['time:timestamp'])
df = df.sort_values(by=['Case ID', 'time:timestamp'])

# Step 2: Define helper function for getting the first activity after a target
def get_next_after(activity_list, target):
    for i in range(len(activity_list) - 1):
        if activity_list[i] == target:
            return activity_list[i + 1]
    return None

# Step 3: Initialize label dictionaries
dp1_labels = {}
dp2_label_dict = {}
dp3_labels = {}
dp4_labels = {}

# Step 4: Group and process each case
grouped = df.groupby('Case ID')

for case_id, group in grouped:
    activities = list(group['Activity'].values)
    indices = group.index.tolist()

    # DP1: first activity after A
    dp1_labels[case_id] = get_next_after(activities, 'A')

    # DP3: first activity after E
    dp3_labels[case_id] = get_next_after(activities, 'E')

    # DP4: first activity after G
    dp4_labels[case_id] = get_next_after(activities, 'G')

    # DP2: new logic based on behavior before first E
    try:
        e_index = activities.index('E')
        activities_before_e = activities[:e_index]
        indices_before_e = indices[:e_index]
    except ValueError:
        # No E in this case → skip
        continue

    c_count = activities_before_e.count('C')
    d_count = activities_before_e.count('D')
    total_cd = c_count + d_count

    case_labels = [''] * len(activities)

    if total_cd == 1:
        # Case 1: only one C or D → label all rows with E
        case_labels = ['E'] * len(activities)
    else:
        # Case 2: multiple C/D → label based on next activity
        last_cd_index = -1
        for i in range(len(activities_before_e)):
            act = activities_before_e[i]
            if act == 'C':
                if i + 1 < len(activities_before_e):
                    next_act = activities_before_e[i + 1]
                    if next_act == 'B':
                        case_labels[i] = 'B'
                    elif next_act == 'C':
                        case_labels[i] = 'C'
                    elif next_act == 'D':
                        case_labels[i] = 'D'
                last_cd_index = i
            elif act == 'D':
                case_labels[i] = 'E'
                last_cd_index = i

        # Add E after last C/D if possible
        if last_cd_index >= 0 and last_cd_index + 1 < len(activities):
            case_labels[last_cd_index + 1] = 'E'

    # Store the DP2 values for this case
    for idx, label in zip(indices, case_labels):
        dp2_label_dict[idx] = label

# Step 5: Assign all DP labels back to the dataframe
df['DP1_label'] = df['Case ID'].map(dp1_labels)
df['DP2_label'] = df.index.map(dp2_label_dict)
df['DP3_label'] = df['Case ID'].map(dp3_labels)
df['DP4_label'] = df['Case ID'].map(dp4_labels)

# Step 6: Forward-fill DP2_label within each case
df['DP2_label'] = df['DP2_label'].replace('', np.nan)
df['DP2_label'] = df.groupby('Case ID')['DP2_label'].ffill()

# Step 7: Print first 10 rows
print(df.head(10))

     Case ID Activity                             Activity Label  \
0  202501011        A                         Initial Assessment   
1  202501011        D                         Doctors Assessment   
2  202501011        E            Further Diagnostics and Imaging   
3  202501011        G           Treatment and Monitor Conditions   
4  202501011        H                        Transfer to the ICU   
5  202501012        A                         Initial Assessment   
6  202501012        B                              Immediate CPR   
7  202501012        C               Stabilization and Monitoring   
8  202501012        E            Further Diagnostics and Imaging   
9  202501012        F  Transfer to Surgery or Advanced Treatment   

       time:timestamp  Heart_Rate  Oxygen_Saturation Blood_Pressure  Age  \
0 2025-01-01 11:35:00         108               90.2          99/61   19   
1 2025-01-01 11:42:00         108               90.2          99/61   19   
2 2025-01-01 12:40:00  

In [48]:
# Save to CSV
df.to_csv("processed_event_log.csv", index=False)

from IPython.display import FileLink
FileLink("processed_event_log.csv")