In [22]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
from IPython.display import FileLink

In [23]:
# Define process events/activity Labels
activities = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']

# Define human-readable labels for each activity
activity_labels = {
    'A': 'Initial Assessment',
    'B': 'Immediate CPR',
    'C': 'Stabilization and Monitoring',
    'D': 'Doctors Assessment',
    'E': 'Further Diagnostics and Imaging',
    'F': 'Transfer to Surgery or Advanced Treatment',
    'G': 'Treatment and Monitor Conditions',
    'H': 'Transfer to the ICU',
    'I': 'Discharge'
}

In [24]:
# ------------------------------
# Generate simulated patient data attributes
# ------------------------------

def generate_patient_attributes():
    # Simulate vitals for accident victims (more likely to be abnormal)

    heart_rate = int(np.random.normal(loc=110, scale=20))  # normal ~60–100 bpm
    oxygen_saturation = round(np.random.normal(loc=88, scale=5), 1)  # normal 95–100%

    # Generate systolic/diastolic blood pressure
    systolic = int(np.random.normal(loc=100, scale=15))  # normal ~120
    diastolic = int(np.random.normal(loc=65, scale=10))  # normal ~80
    blood_pressure = f"{systolic}/{diastolic}"

    # Age range between 18–90, centered around 45
    age = int(np.clip(np.random.normal(loc=45, scale=18), 18, 90))

    # Determine consciousness level based on critical vital signs
    if oxygen_saturation < 85 or heart_rate > 140 or systolic < 80:
        consciousness = 'Unconscious'
    elif oxygen_saturation < 90 or heart_rate > 120 or systolic < 90:
        consciousness = 'Drowsy'
    elif oxygen_saturation < 93 or heart_rate > 110 or systolic < 100:
        consciousness = 'Lethargic'
    else:
        consciousness = 'Alert'

    # Random imaging result
    imaging_result = random.choice([
        "No Critical Injury Found",
        "Confirmed Critical Injury"
    ])

    return {
        'Heart_Rate': heart_rate,
        'Oxygen_Saturation': oxygen_saturation,
        'Blood_Pressure': blood_pressure,
        'Age': age,
        'Consciousness_Level': consciousness,
        'Imaging_Results': imaging_result
    }


# ------------------------------
# Calculate Patient Health Indicator (PHI)
# ------------------------------

def calculate_phi(attributes):
    # Extract key vitals
    hr = attributes['Heart_Rate']
    ox = attributes['Oxygen_Saturation']
    bp = attributes['Blood_Pressure']
    cons = attributes['Consciousness_Level']
    systolic = int(bp.split('/')[0])

    # classification of patient status
    if cons == 'Unconscious' or ox < 85 or systolic < 80:
        return 'Life-Threatening'
    elif cons == 'Drowsy' or ox < 90 or hr > 130 or systolic < 90:
        return 'Critical'
    elif cons == 'Lethargic' or hr > 110 or ox < 94:
        return 'Deteriorating'
    else:
        return 'Stable'


In [25]:
# ------------------------------
# # Define the routing logic based on initial PHI and imaging results
# ------------------------------

def generate_routing_path(phi_initial, imaging_result):
    # Initialize the path starting from activity A
    path = [('A', phi_initial)]
    phi = phi_initial   # Current PHI status (may change during process)
    loop_count = 0
    reached_E = False

    # --- DP1: Decision after A ---
    if phi in ['Stable', 'Deteriorating']:
        # If PHI is relatively mild, follow path A → D → E
        path.append(('D', phi))
        path.append(('E', phi))
        
    elif phi in ['Critical', 'Life-Threatening']:
        # If PHI is more severe, go to CPR and stabilization path: A → B → C
        path.append(('B', phi))
        path.append(('C', phi))
        
     # --- DP2: Decision Before E ---
        if phi == 'Critical':
            # After B → C, directly move to E
            path.append(('E', phi))
            reached_E = True

        elif phi == 'Life-Threatening':
            # For Life-Threatening patients, repeat B → C loop 1 to 4 times
            max_loops = random.randint(1, 4)
            for _ in range(max_loops):
                path.append(('B', phi))
                path.append(('C', phi))
                
            # After looping, assume the patient stabilizes slightly
            phi = 'Critical'

            # Continue through a final C → D → E path
            path.append(('C', phi))  # Final C to separate from loop, and change phi
            path.append(('D', phi))
            path.append(('E', phi))
            reached_E = True

    # --- DP3: Decision after E ---
    if phi in ['Critical', 'Deteriorating'] and imaging_result == "Confirmed Critical Injury":
    # If serious condition + critical imaging → transfer to advanced treatment
        path.append(('F', phi))
    else:
        # Otherwise → go to treatment and monitoring (G)
        path.append(('G', phi))

    # --- DP4: Decision after G ---
    # Only apply this logic if the last activity was G
    if path[-1][0] == 'G':
        # If condition is still concerning, but no critical injury found → go to ICU (H)
        if phi in ['Critical', 'Deteriorating'] and imaging_result == "No Critical Injury Found":
            path.append(('H', phi))
        else:
            # Otherwise → patient is discharged (I)
            path.append(('I', phi))

    return path


In [26]:
# ------------------------------
# Generate event log
# ------------------------------

# Initialize the event log
event_log = []
start_time = datetime(2025, 1, 1, 8, 0)  #  Define the starting timestamp for the simulation

for case_num in range(1, 301):  #  the number of cases in the simulation
    #  Randomly assign a case start time within a 4-hour window
    case_start = start_time + timedelta(minutes=random.randint(0, 240)) 

    # Generate a unique Case ID based on date + case number
    case_id = f"{case_start.strftime('%Y%m%d')}{case_num}"

    # Step 1: Generate patient attributes
    attrs = generate_patient_attributes()

    # Step 2: Calculate PHI status from attributes
    phi = calculate_phi(attrs)

    # Step 3: Randomly assign imaging result for the case
    imaging_result = random.choice([
        "No Critical Injury Found",
        "Confirmed Critical Injury"
    ])

    # Step 4: Generate the full process path (sequence of activities)
    path = generate_routing_path(phi, imaging_result)

    # Step 5: Simulate timestamps for each activity in the path
    current_time = case_start
    imaging_seen = False      # Flag to know when activity E (imaging) occurs

    for activity, phi_event in path:
        if activity == 'E':
            imaging_seen = True    #  Imaging result is revealed after E

         # Create event dictionary with all attributes and current activity
        event = {
            'Case ID': case_id,
            'Activity': activity,
            'Activity Label': activity_labels[activity],
            'time:timestamp': current_time.isoformat(),
            'Heart_Rate': attrs['Heart_Rate'],
            'Oxygen_Saturation': attrs['Oxygen_Saturation'],
            'Blood_Pressure': attrs['Blood_Pressure'],
            'Age': attrs['Age'],
            'Consciousness_Level': attrs['Consciousness_Level'],
            'PHI': phi_event,
            'Imaging_Results': imaging_result if imaging_seen else ""   # Hidden before E
        }

        # Add event to the log
        event_log.append(event)

        # Move to the next activity timestamp
        current_time += timedelta(minutes=random.randint(5, 60))

# Convert the event log to a  DataFrame
df_event_log = pd.DataFrame(event_log)

# Export the DataFrame to CSV
csv_path = "synthetic_triage_eventlog.csv"
df_event_log.to_csv(csv_path, index=False)

# Show download link
FileLink(csv_path)

In [27]:
# Group event log by case ID and print activity sequences using → separator
# ------------------------------

for case_id, group in df_event_log.groupby("Case ID"):
    sequence = " → ".join(group["Activity"].tolist())
    
    print(f"{case_id}: {sequence}")

202501011: A → D → E → G → H
2025010110: A → B → C → E → F
20250101100: A → B → C → B → C → C → D → E → F
20250101101: A → B → C → E → G → H
20250101102: A → B → C → E → G → H
20250101103: A → B → C → E → G → H
20250101104: A → B → C → B → C → B → C → B → C → C → D → E → F
20250101105: A → B → C → E → F
20250101106: A → B → C → E → G → H
20250101107: A → B → C → E → F
20250101108: A → B → C → B → C → B → C → B → C → C → D → E → G → H
20250101109: A → B → C → B → C → B → C → B → C → C → D → E → F
2025010111: A → B → C → E → F
20250101110: A → B → C → E → F
20250101111: A → B → C → E → F
20250101112: A → D → E → G → H
20250101113: A → B → C → B → C → B → C → B → C → C → D → E → F
20250101114: A → D → E → F
20250101115: A → B → C → E → G → H
20250101116: A → B → C → B → C → C → D → E → F
20250101117: A → B → C → E → G → H
20250101118: A → B → C → B → C → C → D → E → F
20250101119: A → B → C → B → C → B → C → B → C → C → D → E → F
2025010112: A → B → C → B → C → C → D → E → G → H
202501011

In [28]:
# count unique activity sequences (process Variants)
# ------------------------------

from collections import Counter

# Step 1: Extract activity sequences per case
variants = []

for case_id, group in df_event_log.groupby("Case ID"):
    trace = " → ".join(group["Activity"].tolist())
    variants.append(trace)

# Step 2: Count unique variants
variant_counts = Counter(variants)

# Step 3: Print the variants and their frequencies
print("Unique Variants and Their Frequencies:\n")
for i, (variant, count) in enumerate(variant_counts.items(), 1):
    
    print(f"Variant {i} (Cases: {count}): {variant}")


Unique Variants and Their Frequencies:

Variant 1 (Cases: 26): A → D → E → G → H
Variant 2 (Cases: 64): A → B → C → E → F
Variant 3 (Cases: 14): A → B → C → B → C → C → D → E → F
Variant 4 (Cases: 79): A → B → C → E → G → H
Variant 5 (Cases: 18): A → B → C → B → C → B → C → B → C → C → D → E → F
Variant 6 (Cases: 12): A → B → C → B → C → B → C → B → C → C → D → E → G → H
Variant 7 (Cases: 16): A → D → E → F
Variant 8 (Cases: 15): A → B → C → B → C → C → D → E → G → H
Variant 9 (Cases: 11): A → B → C → B → C → B → C → B → C → B → C → C → D → E → F
Variant 10 (Cases: 7): A → D → E → G → I
Variant 11 (Cases: 12): A → B → C → B → C → B → C → B → C → B → C → C → D → E → G → H
Variant 12 (Cases: 13): A → B → C → B → C → B → C → C → D → E → G → H
Variant 13 (Cases: 13): A → B → C → B → C → B → C → C → D → E → F
