In [305]:
from collections import defaultdict
import pandas as pd
import math
import scipy.stats as st
import pickle


In [306]:
import sys
import os

# Add project root to sys.path
sys.path.append(os.path.abspath(os.path.join("../..")))  # if you're in optimizer/

Load

In [330]:
df = pd.read_csv('../../simulated_log.csv')
raw = pd.read_csv('../../raw_data/LoanApp.csv.gz', compression='gzip')


with open("../parameters/simulation_parameters.pkl", "rb") as f:
    sim_params = pickle.load(f)

In [334]:
raw[raw['case_id']==890]

Unnamed: 0,case_id,resource,activity,start_time,end_time
890,890,AML Investigator-000001,AML check,2023-05-01T12:44:43.436,2023-05-01T12:48:54.088


In [308]:
transition_dict = sim_params["transition_probabilities"]
durations = sim_params["activity_durations_dict"]
calendars = sim_params["res_calendars"]  # {agent_id: RCalendar}

In [332]:
# sample
raw = raw[raw['case_id']>799]
raw = raw.reset_index(drop=True).reset_index()
raw['case_id'] = raw['index']
del raw['index']

In [333]:
raw[raw['case_id']==91]

Unnamed: 0,case_id,resource,activity,start_time,end_time
91,91,Applicant-000001,Applicant completes form,2023-04-20T14:12:34.230,2023-04-21T07:12:34.230


# Validation for traces

Functions

In [310]:
def sample_from_distribution(distribution):
    """
    Copy of the AgentSimulator utils method
    """
    if distribution.type.value == "expon":
        scale = distribution.mean - distribution.min
        if scale < 0.0:
            print("Warning! Trying to generate EXPON sample with 'mean' < 'min', using 'mean' as scale value.")
            scale = distribution.mean
        sample = st.expon.rvs(loc=distribution.min, scale=scale, size=1)
    elif distribution.type.value == "gamma":
        # If the distribution corresponds to a 'gamma' with loc!=0, the estimation is done wrong
        # dunno how to take that into account
        sample = st.gamma.rvs(
            pow(distribution.mean, 2) / distribution.var,
            loc=0,
            scale=distribution.var / distribution.mean,
            size=1,
        )
    elif distribution.type.value == "norm":
        sample = st.norm.rvs(loc=distribution.mean, scale=distribution.std, size=1)
    elif distribution.type.value == "uniform":
        sample = st.uniform.rvs(loc=distribution.min, scale=distribution.max - distribution.min, size=1)
    elif distribution.type.value == "lognorm":
        # If the distribution corresponds to a 'lognorm' with loc!=0, the estimation is done wrong
        # dunno how to take that into account
        pow_mean = pow(distribution.mean, 2)
        phi = math.sqrt(distribution.var + pow_mean)
        mu = math.log(pow_mean / phi)
        sigma = math.sqrt(math.log(phi ** 2 / pow_mean))
        sample = st.lognorm.rvs(sigma, loc=0, scale=math.exp(mu), size=1)
    elif distribution.type.value == "fix":
        sample = [distribution.mean] * 1

    return sample[0]

def remove_transitive_response_constraints(response_constraints):
    cleaned = {}

    for act, responses in response_constraints.items():
        direct = set(responses)

        # Remove any activity that is indirectly reachable through other responses
        for mid in responses:
            indirects = set(response_constraints.get(mid, []))
            direct -= indirects

        cleaned[act] = list(direct)

    return cleaned


def discover_post_conditions(df, activity_col='activity', case_col='case_id', order_by='end_time'):
    response_counts = defaultdict(lambda: defaultdict(int))
    activity_counts = defaultdict(int)

    # Group by case
    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()

        for i, act in enumerate(sorted_activities):
            activity_counts[act] += 1

            # All activities that come after this one
            for after_act in sorted_activities[i+1:]:
                response_counts[act][after_act] += 1

    # Build final response constraint map
    post_conditions = {}
    for act, after_acts in response_counts.items():
        constraints = []
        for after_act, count in after_acts.items():
            # Threshold: e.g., B happens after A in 80%+ of A occurrences
            if count / activity_counts[act] >= 0.8:
                constraints.append(after_act)
        if constraints:
            post_conditions[act] = constraints

    post_conditions = remove_transitive_response_constraints(post_conditions)

    return post_conditions


def extract_all_successors(transition_dict):
    """
    Converts a nested transition dictionary to a flat mapping:
    prefix_activity → list of all possible successor activities (non-zero prob)

    Parameters:
    - transition_dict: dict of {prefix: {agent_id: {activity: prob}}} ! Careful, only one activity prefixes are valid

    Returns:
    - dict of {activity: [possible next activities]}
    """
    flat_successors = {}
    seen_anchors = set()

    for prefix, agent_dict in transition_dict.items():
        if not prefix:
            continue  # skip empty prefixes
        if prefix[-1] in seen_anchors:
            continue
        anchor = prefix[-1]  # last activity in the prefix
        seen_anchors.add(anchor)

        successor_set = set()
        for agent_transitions in agent_dict.values():
            for act, prob in agent_transitions.items():
                if prob > 0:
                    successor_set.add(act)

        flat_successors[anchor] = sorted(successor_set)

    return flat_successors


def mine_concurrent_activities(df, case_col='case_id', activity_col='activity',
                                start_col='start_time', end_col='end_time'):
    """
    For each activity, detect other activities that can run concurrently
    by overlapping time windows in the same case.

    Parameters:
    - df: Event log with case_id, activity, start_time, end_time

    Returns:
    - co_occurrence: dict {activity: [other activities that overlapped with it]}
    """
    df = df.copy()
    df[start_col] = pd.to_datetime(df[start_col], format='mixed', utc=True)
    df[end_col] = pd.to_datetime(df[end_col], format='mixed', utc=True)


    co_occurrence = defaultdict(set)

    for case_id, group in df.groupby(case_col):
        group = group.sort_values(by=start_col)
        for i, row_i in group.iterrows():
            act_i, start_i, end_i = row_i[activity_col], row_i[start_col], row_i[end_col]
            for j, row_j in group.iterrows():
                if i == j:
                    continue
                act_j, start_j, end_j = row_j[activity_col], row_j[start_col], row_j[end_col]
                # Check for overlap
                if start_i < end_j and start_j < end_i:
                    co_occurrence[act_i].add(act_j)

    # Convert sets to sorted lists
    return {act: sorted(list(others)) for act, others in co_occurrence.items()}

def extract_xor_groups_from_cooccurrence(successor_map, co_occurrence_map):
    """
    Builds XOR groups from a simplified co-occurrence map.
    An activity is excluded from XOR groups if it can co-occur with the anchor.
    Returns:
    - xor_groups: {anchor: list of mutually exclusive groups (each group is a list of activities)}
    """
    xor_groups = defaultdict(list)

    for anchor, successors in successor_map.items():
        if not successors:
            continue

        # ⚠️ Filter out successors that can co-occur with the anchor
        filtered_successors = [
            act for act in successors
            if act not in co_occurrence_map.get(anchor, []) and anchor not in co_occurrence_map.get(act, [])
        ]

        remaining = set(filtered_successors)
        groups = []

        while remaining:
            act = remaining.pop()
            group = {act}

            for other in list(remaining):
                if (
                    act in co_occurrence_map.get(other, []) or
                    other in co_occurrence_map.get(act, [])
                ):
                    group.add(other)
                    remaining.remove(other)

            groups.append(sorted(group))

        if len(groups) > 1:
            xor_groups[anchor] = groups

    return xor_groups

# Note to self. This does not cover cases like: Reject application cannot happen after Approve application

def discover_prerequisites_from_log(df, activity_col='activity', case_col='case_id', order_by='end_time'):
    # Step 1: Collect all activities that appear before each activity in each case
    activity_to_preceding_sets = defaultdict(list)

    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()
        seen = set()
        for i, act in enumerate(sorted_activities):
            activity_to_preceding_sets[act].append(seen.copy())
            seen.add(act)

    # Step 2: Intersect the "seen-before" sets across all cases
    raw_prerequisites = {}
    for act, preceding_sets in activity_to_preceding_sets.items():
        if preceding_sets:
            raw_prerequisites[act] = set.intersection(*preceding_sets)
        else:
            raw_prerequisites[act] = set()

    # Step 3: Remove transitive dependencies
    # If A → B and B → C, remove A from prerequisites of C
    def remove_transitive(prereq_dict):
        cleaned = {}
        for act in prereq_dict:
            direct_prereqs = prereq_dict[act].copy()
            # Remove any indirect dependencies
            for p in direct_prereqs.copy():
                indirects = prereq_dict.get(p, set())
                direct_prereqs -= indirects
            cleaned[act] = list(direct_prereqs)
        return cleaned

    strict_prerequisites = remove_transitive(raw_prerequisites)
    return strict_prerequisites



def validate_simulated_log(df, prerequisites, post_conditions, valid_end_activities, 
                            xor_rules=None, case_col='case_id', activity_col='activity', order_by='start'):
    issues = []

    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()

        if not sorted_activities:
            issues.append((case_id, "Empty trace"))
            continue

        activities_no_end = [a for a in sorted_activities if a != "zzz_end"]

        # 🚨 1. Prerequisites check
        performed = set()
        for act in activities_no_end:
            required = prerequisites.get(act, [])
            if not all(pre in performed for pre in required):
                missing = [pre for pre in required if pre not in performed]
                issues.append((case_id, f"Activity '{act}' missing prerequisites {missing}"))
            performed.add(act)

        # 🚨 2. Post-conditions check
        for i, act in enumerate(activities_no_end):
            required_posts = post_conditions.get(act, [])
            future_acts = set(activities_no_end[i+1:])
            for post in required_posts:
                if post not in future_acts:
                    issues.append((case_id, f"Activity '{act}' missing required post-condition '{post}'"))

        # 🚨 3. End correctness check
        if activities_no_end:
            last_real_activity = activities_no_end[-1]
            if last_real_activity not in valid_end_activities:
                issues.append((case_id, f"Case ends incorrectly on '{last_real_activity}'"))

        # 🚨 4. XOR violation check
        if xor_rules:
            for anchor, groups in xor_rules.items():
                current_window = set()
                windows = []

                for act in activities_no_end:
                    if act == anchor:
                        # Anchor re-executed → start new window
                        if current_window:
                            windows.append(current_window)
                        current_window = set()
                    else:
                        for idx, group in enumerate(groups):
                            if act in group:
                                current_window.add(idx)

                # Add final window
                if current_window:
                    windows.append(current_window)

                # Now validate all XOR windows
                for win_idx, window in enumerate(windows):
                    if len(window) > 1:
                        issues.append((case_id, f"XOR violation for anchor '{anchor}' in window {win_idx}: multiple groups executed {list(window)}"))

    return issues





In [311]:
def validate_agents(agents, durations, calendars=None):
    issues = []

    for agent in agents:
        agent_id = agent.agent_id

        # 1. Check capabilities
        if not agent.capable_activities:
            issues.append((agent_id, "Agent has no capable activities"))

        for act in agent.capable_activities:
            dur = durations.get(agent_id, {}).get(act)
            if not dur or isinstance(dur, list):
                issues.append((agent_id, f"Activity '{act}' has no valid duration distribution"))

        # 2. Check calendar coverage
        if calendars:
            calendar = calendars.get(agent_id)
            if calendar is None:
                issues.append((agent_id, "Missing calendar"))
            elif not calendar.to_dict().get('time_periods', None):
                issues.append((agent_id, "Calendar has no time periods"))


    return issues


In [312]:
class AgentStub: 
    def __init__(self, agent_id, capable_activities, calendar):
        self.agent_id = agent_id
        self.capable_activities = capable_activities
        self.calendar = calendar


In [313]:
def print_agent_calendars(calendars, agent_ids=None, max_agents=10):
    """
    Pretty print the calendar time periods for each agent.
    
    Parameters:
    - calendars: dict of {agent_id: RCalendar}
    - agent_ids: optional list of agent IDs to filter (default: all)
    - max_agents: limit number of agents printed (default: 10)
    """
    if agent_ids is None:
        agent_ids = list(calendars.keys())

    count = 0

    for agent_id in agent_ids:
        calendar = calendars.get(agent_id)
        if calendar is None:
            print(f"Agent {agent_id}: ❌ No calendar assigned")
            continue

        # Try accessing time_periods safely
        time_periods = calendar.to_dict().get('time_periods', None)
        if not time_periods:
            print(f"Agent {agent_id}: ⚠️ Calendar exists but has no time periods")
        else:
            print(f"\nAgent {agent_id} ✅ Calendar time periods:")
            for period in time_periods:
                print(f"  - {period['from']} → {period['to']}, {period['beginTime']}–{period['endTime']}")

        count += 1
        if count >= max_agents:
            print(f"\n...and {len(agent_ids) - max_agents} more. Use `max_agents=None` to show all.")
            break


## Validation of my simulation

In [314]:
# Step 1: Discover prerequisites and post-conditions
prerequisites = discover_prerequisites_from_log(raw, activity_col='activity', case_col='case_id', order_by='end_time')
post_conditions = discover_post_conditions(raw, activity_col='activity', case_col='case_id', order_by='end_time')

# Step 2: Determine valid end activities (from transition dict if you have it)
# If you don't have a transition dict handy, you can approximate:
valid_end_activities = raw.groupby('case_id')['activity'].last().value_counts().index.tolist()

# Step 3: Extract successor and concurrency maps
successor_map = extract_all_successors(transition_dict)  # <- you must define/provide transition_dict
co_occurrence_map = mine_concurrent_activities(raw, case_col='case_id', activity_col='activity',
                                               start_col='start_time', end_col='end_time')

# Step 4: Infer XOR rules
xor_rules = extract_xor_groups_from_cooccurrence(successor_map, co_occurrence_map)

# Step 5: Validate log
issues = validate_simulated_log(df,
                                prerequisites=prerequisites,
                                post_conditions=post_conditions,
                                valid_end_activities=valid_end_activities,
                                xor_rules=xor_rules,
                                case_col='case_id',
                                activity_col='activity',
                                order_by='start')

# Step 6: Review results
if issues:
    print(f"⚠️ Validation found {len(issues)} issues:")
    for case_id, desc in issues:
        print(f"  - Case {case_id}: {desc}")
else:
    print("✅ Validation passed with no issues!")


⚠️ Validation found 884 issues:
  - Case 0: XOR violation for anchor 'Check application form completeness' in window 0: multiple groups executed [0, 2, 3]
  - Case 0: XOR violation for anchor 'Check credit history' in window 0: multiple groups executed [0, 1, 2]
  - Case 0: XOR violation for anchor 'AML check' in window 1: multiple groups executed [0, 1, 2]
  - Case 0: XOR violation for anchor 'Appraise property' in window 0: multiple groups executed [1, 2]
  - Case 0: XOR violation for anchor 'Appraise property' in window 1: multiple groups executed [0, 2]
  - Case 1: XOR violation for anchor 'Check application form completeness' in window 0: multiple groups executed [0, 2, 3]
  - Case 1: XOR violation for anchor 'Check application form completeness' in window 1: multiple groups executed [0, 2, 3]
  - Case 1: XOR violation for anchor 'Check credit history' in window 0: multiple groups executed [0, 1, 2]
  - Case 1: XOR violation for anchor 'Check credit history' in window 2: multiple 

In [315]:
agents = []

for agent_id, acts in durations.items():
    # Only include activities with a valid duration distribution
    capable_activities = {
        act for act, dist in acts.items()
        if dist and not isinstance(dist, list)
    }
    # print(f"Agent{agent_id} can do {capable_activities}")

    calendar = calendars.get(agent_id)

    agents.append(AgentStub(agent_id, capable_activities, calendar))

agent_issues = validate_agents(agents, durations, calendars)

if agent_issues:
    print(f"⚠️ Found {len(agent_issues)} agent issues:")
    for agent_id, msg in agent_issues:
        print(f"  - Agent {agent_id}: {msg}")
else:
    print("✅ All agents are valid!")


✅ All agents are valid!


## Validation of AgentSimulator Simulation

In [316]:
def load_all_simulated_logs(base_simulation_dir):
    """
    Load all simulated log CSVs from a given base directory into a single DataFrame.
    
    Parameters:
        base_simulation_dir (str): Path to the directory containing simulated_log_*.csv files.
    
    Returns:
        pd.DataFrame: A combined DataFrame of all simulated logs, with a 'simulation_run' column.
    """
    simulated_logs = []
    for root, dirs, files in os.walk(base_simulation_dir):
        for file in files:
            if file.startswith("simulated_log_") and file.endswith(".csv"):
                simulated_logs.append(os.path.join(root, file))

    simulated_log_dfs = []
    for log_path in simulated_logs:
        df = pd.read_csv(log_path)
        simulation_index = int(os.path.basename(log_path).split('_')[-1].split('.')[0])
        df["simulation_run"] = simulation_index
        simulated_log_dfs.append(df)

    if simulated_log_dfs:
        return pd.concat(simulated_log_dfs, ignore_index=True)
    else:
        raise FileNotFoundError(f"No simulated logs found in {base_simulation_dir}")



In [317]:
simulated_logs_df = load_all_simulated_logs("../../simulated_data/LoanApp.csv/main_results/")
simulated_logs_df = simulated_logs_df.rename(columns={'activity_name': 'activity'})

In [318]:
for run_id, run_df in simulated_logs_df.groupby('simulation_run'):
    issues = validate_simulated_log(run_df,
                                prerequisites=prerequisites,
                                post_conditions=post_conditions,
                                valid_end_activities=valid_end_activities,
                                xor_rules=xor_rules,
                                case_col='case_id',
                                activity_col='activity',
                                order_by='start_timestamp')
    print(f"=============================================")
    print(f"SIMULATION {run_id}")
    if issues:
        print(f"⚠️ Validation found {len(issues)} issues:")
        for case_id, desc in issues:
            print(f"  - Case {case_id}: {desc}")
    else:
        print("✅ Validation passed with no issues!")


SIMULATION 0
⚠️ Validation found 800 issues:
  - Case 0: XOR violation for anchor 'Check application form completeness' in window 0: multiple groups executed [0, 2, 3]
  - Case 0: XOR violation for anchor 'Check credit history' in window 1: multiple groups executed [0, 2]
  - Case 0: XOR violation for anchor 'AML check' in window 0: multiple groups executed [0, 1, 2]
  - Case 0: XOR violation for anchor 'Appraise property' in window 0: multiple groups executed [1, 2]
  - Case 1: XOR violation for anchor 'Check application form completeness' in window 0: multiple groups executed [0, 2, 3]
  - Case 1: XOR violation for anchor 'Check credit history' in window 0: multiple groups executed [0, 1, 2]
  - Case 1: XOR violation for anchor 'AML check' in window 1: multiple groups executed [0, 1]
  - Case 1: XOR violation for anchor 'Appraise property' in window 0: multiple groups executed [1, 2]
  - Case 2: XOR violation for anchor 'Check application form completeness' in window 0: multiple grou

Conclusion: The non-deterministic approach is sometimes not respecting pre-requisites and post-conditions.

# Comparison of CT and Activities per Trace

Comparison on CT per case

Calculation of AS CT

In [319]:

simulated_logs_df['start'] = pd.to_datetime(simulated_logs_df['start_timestamp'], utc=True, format='mixed')
simulated_logs_df['end'] = pd.to_datetime(simulated_logs_df['end_timestamp'], utc=True, format='mixed')


agent_sim_ct = simulated_logs_df.groupby(['simulation_run', 'case_id']).agg({
    'start': 'min',
    'end': 'max'
}).reset_index()
agent_sim_ct['cycle_time'] = (agent_sim_ct['end'] - agent_sim_ct['start']).dt.total_seconds()/60
agent_sim_ct_stats = agent_sim_ct.groupby('case_id')['cycle_time'].agg(['mean', 'median', 'min', 'max']).reset_index()


Calculation of optimizer CT

In [320]:
df['start'] = pd.to_datetime(df['start'], utc=True, format='mixed')
df['end'] = pd.to_datetime(df['end'], utc=True, format='mixed')

my_ct = df.groupby('case_id').agg({
    'start': 'min',
    'end': 'max'
}).reset_index()
my_ct['cycle_time'] = (my_ct['end'] - my_ct['start']).dt.total_seconds()/60


Calculation or raw CT

In [321]:
raw['start'] = pd.to_datetime(raw['start_time'], utc=True, format='mixed')
raw['end'] = pd.to_datetime(raw['end_time'], utc=True, format='mixed')

my_raw_ct = raw.groupby('case_id').agg({
    'start': 'min',
    'end': 'max'
}).reset_index()
my_raw_ct['cycle_time'] = (my_raw_ct['end'] - my_raw_ct['start']).dt.total_seconds()/60

In [322]:
comparison_df = pd.merge(my_ct[['case_id', 'cycle_time']], agent_sim_ct_stats, on='case_id', suffixes=('_mine', '_agent'))
comparison_df = comparison_df.rename({'cycle_time':'optimizer_CT'})

In [323]:
comparison_df = pd.merge(comparison_df, my_raw_ct[['case_id', 'cycle_time']], on='case_id', suffixes=('_mine', '_raw'))

In [325]:
comparison_df["better_than_simulator_by"] = comparison_df['median'] - comparison_df['cycle_time_mine']

In [326]:
comparison_df[['case_id', 'cycle_time_mine', 'median', 'cycle_time_raw', 'better_than_simulator_by']].sample(50)

Unnamed: 0,case_id,cycle_time_mine,median,cycle_time_raw,better_than_simulator_by
34,34,332.871731,144.078166,5.0,-188.793565
78,78,7437.325632,1105.069914,20.0,-6332.255718
23,23,171.747111,2121.084204,34.693683,1949.337093
35,35,1354.322558,143.593425,23.072983,-1210.729133
94,94,415.052009,1046.342841,976.74445,631.290832
98,98,155.122847,141.790395,20.0,-13.332452
81,81,388.917744,181.561746,24.85305,-207.355998
70,70,1455.126057,140.020958,39.939417,-1315.105099
47,47,1636.748785,118.830492,23.59755,-1517.918293
87,87,204.896071,143.227221,984.7771,-61.66885


In [301]:
my_ct[my_ct['case_id']==99]

Unnamed: 0,case_id,start,end,cycle_time
99,99,2023-04-28 09:30:00+00:00,2023-04-28 14:01:38.598698604+00:00,271.643312


In [302]:
df[df['case_id']==99]

Unnamed: 0,case_id,agent,activity,start,end
1637,99,10,Check application form completeness,2023-04-28 09:30:00+00:00,2023-04-28 10:01:51.136014463+00:00
1638,99,6,Appraise property,2023-04-28 10:01:51.136014463+00:00,2023-04-28 10:03:14.226846041+00:00
1639,99,5,Check credit history,2023-04-28 10:03:14.226846041+00:00,2023-04-28 10:09:41.475139252+00:00
1640,99,2,AML check,2023-04-28 10:09:41.475139252+00:00,2023-04-28 11:39:45.046765405+00:00
1641,99,13,Assess loan risk,2023-04-28 11:39:45.046765405+00:00,2023-04-28 11:59:45.046765405+00:00
1642,99,8,Check application form completeness,2023-04-28 11:59:45.046765405+00:00,2023-04-28 12:29:52.270285731+00:00
1643,99,7,AML check,2023-04-28 12:29:52.270285731+00:00,2023-04-28 12:29:54.845427369+00:00
1644,99,5,Check credit history,2023-04-28 12:29:54.845427369+00:00,2023-04-28 13:13:13.858954773+00:00
1645,99,6,Appraise property,2023-04-28 13:13:13.858954773+00:00,2023-04-28 13:31:38.598698604+00:00
1646,99,16,Assess loan risk,2023-04-28 13:31:38.598698604+00:00,2023-04-28 13:51:38.598698604+00:00


In [336]:
sim_params['activity_durations_dict']

{0: {'Check application form completeness': <source.arrival_distribution.DurationDistribution at 0x284dd7250>,
  'Assess loan risk': [],
  'Return application back to applicant': <source.arrival_distribution.DurationDistribution at 0x284dc6310>,
  'Approve application': <source.arrival_distribution.DurationDistribution at 0x28708dc90>,
  'Check credit history': <source.arrival_distribution.DurationDistribution at 0x28708cc50>,
  'Design loan offer': [],
  'Approve loan offer': [],
  'Applicant completes form': [],
  'Appraise property': [],
  'Reject application': <source.arrival_distribution.DurationDistribution at 0x28708e890>,
  'Cancel application': <source.arrival_distribution.DurationDistribution at 0x28a425710>,
  'AML check': [],
  'zzz_end': <source.arrival_distribution.DurationDistribution at 0x284e203d0>},
 1: {'Check application form completeness': <source.arrival_distribution.DurationDistribution at 0x28a5c28d0>,
  'Assess loan risk': [],
  'Return application back to appl

In [329]:
raw[raw['case_id']==91]

Unnamed: 0,case_id,resource,activity,start_time,end_time,start,end
91,91,Applicant-000001,Applicant completes form,2023-04-20T14:12:34.230,2023-04-21T07:12:34.230,2023-04-20 14:12:34.230000+00:00,2023-04-21 07:12:34.230000+00:00


In [327]:
comparison_df[['cycle_time_mine', 'cycle_time_raw']]

Unnamed: 0,cycle_time_mine,cycle_time_raw
0,230.983545,54.570900
1,1746.609201,1.067650
2,302.167501,30.950833
3,229.687555,13.114550
4,1401.003294,33.570033
...,...,...
95,4525.094029,0.943367
96,127.298088,11.354067
97,4350.240462,12.390917
98,155.122847,20.000000


In [337]:
sim_params['case_arrival_times']

[Timestamp('2023-04-20 08:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 08:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 09:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 09:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 10:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 10:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 11:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 11:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 12:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 12:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 13:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 13:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 14:00:00+0000', tz='UTC'),
 Timestamp('2023-04-20 14:30:00+0000', tz='UTC'),
 Timestamp('2023-04-20 15:00:00+0000', tz='UTC'),
 Timestamp('2023-04-21 07:30:00+0000', tz='UTC'),
 Timestamp('2023-04-21 08:00:00+0000', tz='UTC'),
 Timestamp('2023-04-21 08:30:00+0000', tz='UTC'),
 Timestamp('2023-04-21 09:00:00+0000', tz='UTC'),
 Timestamp('2023-04-21 09:30:00+0000', tz='UTC'),
