In [147]:
import sys
import os

# Add project root to sys.path
sys.path.append(os.path.abspath(os.path.join("../..")))  # if you're in optimizer/

import pandas as pd
from source.utils import sample_from_distribution
import random
import numpy as np
import datetime
import pytz
import itertools
from collections import defaultdict


Load Simulation Parameters

In [148]:
import pickle

with open("../parameters/simulation_parameters.pkl", "rb") as f:
    sim_params = pickle.load(f)

In [141]:
# sim_params.keys()

### Simulate Test Cases

Get Strict Pre-requisites from logs

In [142]:
from collections import defaultdict

In [143]:
# Note to self. This does not cover cases like: Reject application cannot happen after Approve application

def discover_strict_prerequisites_from_log(df, activity_col='activity', case_col='case_id', order_by='end_time'):
    # Step 1: Collect all activities that appear before each activity in each case
    activity_to_preceding_sets = defaultdict(list)

    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()
        seen = set()
        for i, act in enumerate(sorted_activities):
            activity_to_preceding_sets[act].append(seen.copy())
            seen.add(act)

    # Step 2: Intersect the "seen-before" sets across all cases
    raw_prerequisites = {}
    for act, preceding_sets in activity_to_preceding_sets.items():
        if preceding_sets:
            raw_prerequisites[act] = set.intersection(*preceding_sets)
        else:
            raw_prerequisites[act] = set()

    # Step 3: Remove transitive dependencies
    # If A → B and B → C, remove A from prerequisites of C
    def remove_transitive(prereq_dict):
        cleaned = {}
        for act in prereq_dict:
            direct_prereqs = prereq_dict[act].copy()
            # Remove any indirect dependencies
            for p in direct_prereqs.copy():
                indirects = prereq_dict.get(p, set())
                direct_prereqs -= indirects
            cleaned[act] = list(direct_prereqs)
        return cleaned

    strict_prerequisites = remove_transitive(raw_prerequisites)
    return strict_prerequisites


In [144]:
df = pd.read_csv("../../raw_data/LoanApp.csv.gz", compression='gzip')
# df = pd.read_csv("../raw_data/Hospital.csv")

Identify valid end activities

In [145]:
def find_valid_end_activities(transition_dict):
    valid_end_activities = set()

    for prefix, agent_dicts in transition_dict.items():
        for agent_transitions in agent_dicts.values():
            # print(agent_transitions)
            if 'zzz_end' in agent_transitions:
                if prefix:  # make sure prefix is not empty
                    valid_end_activities.add(prefix[-1])  # last activity in prefix

    return valid_end_activities



valid_end_activities = find_valid_end_activities(sim_params["transition_probabilities"])

In [146]:
df_simulated[df_simulated['case_id']==0]

NameError: name 'df_simulated' is not defined

In [None]:
valid_end_activities

{'Approve application', 'Cancel application', 'Reject application'}

Accounting for post-conditions

In [None]:
def remove_transitive_response_constraints(response_constraints):
    cleaned = {}

    for act, responses in response_constraints.items():
        direct = set(responses)

        # Remove any activity that is indirectly reachable through other responses
        for mid in responses:
            indirects = set(response_constraints.get(mid, []))
            direct -= indirects

        cleaned[act] = list(direct)

    return cleaned


def discover_post_conditions(df, activity_col='activity', case_col='case_id', order_by='end_time'):
    response_counts = defaultdict(lambda: defaultdict(int))
    activity_counts = defaultdict(int)

    # Group by case
    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()
        seen = set()

        for i, act in enumerate(sorted_activities):
            activity_counts[act] += 1

            # All activities that come after this one
            for after_act in sorted_activities[i+1:]:
                response_counts[act][after_act] += 1

    # Build final response constraint map
    post_conditions = {}
    for act, after_acts in response_counts.items():
        constraints = []
        for after_act, count in after_acts.items():
            # Threshold: e.g., B happens after A in 80%+ of A occurrences
            if count / activity_counts[act] >= 0.8:
                constraints.append(after_act)
        if constraints:
            post_conditions[act] = constraints

    post_conditions = remove_transitive_response_constraints(post_conditions)

    return post_conditions


XOR Rules

In [None]:
def extract_all_successors(transition_dict):
    """
    Converts a nested transition dictionary to a flat mapping:
    prefix_activity → list of all possible successor activities (non-zero prob)

    Parameters:
    - transition_dict: dict of {prefix: {agent_id: {activity: prob}}} ! Careful, only one activity prefixes are valid

    Returns:
    - dict of {activity: [possible next activities]}
    """
    flat_successors = {}
    seen_anchors = set()

    for prefix, agent_dict in transition_dict.items():
        if not prefix:
            continue  # skip empty prefixes
        if prefix[-1] in seen_anchors:
            continue
        anchor = prefix[-1]  # last activity in the prefix
        seen_anchors.add(anchor)
        # print(anchor, ":")
        successor_set = set()
        for agent_transitions in agent_dict.values():
            for act, prob in agent_transitions.items(): 
                if (act not in successor_set):
                    # print(act, prob)
                    if prob > 0:
                        # print(act)
                        successor_set.add(act)
        # print(successor_set)

        flat_successors[anchor] = sorted(successor_set)
        # print(flat_successors[anchor])
    # print("END", flat_successors['Check application form completeness'])
    return flat_successors


In [None]:
def mine_concurrent_activities(df, case_col='case_id', activity_col='activity',
                                start_col='start_time', end_col='end_time'):
    """
    For each activity, detect other activities that can run concurrently
    by overlapping time windows in the same case.

    Parameters:
    - df: Event log with case_id, activity, start_time, end_time

    Returns:
    - co_occurrence: dict {activity: [other activities that overlapped with it]}
    """
    df = df.copy()
    df[start_col] = pd.to_datetime(df[start_col])
    df[end_col] = pd.to_datetime(df[end_col])

    co_occurrence = defaultdict(set)

    for case_id, group in df.groupby(case_col):
        group = group.sort_values(by=start_col)
        for i, row_i in group.iterrows():
            act_i, start_i, end_i = row_i[activity_col], row_i[start_col], row_i[end_col]
            for j, row_j in group.iterrows():
                if i == j:
                    continue
                act_j, start_j, end_j = row_j[activity_col], row_j[start_col], row_j[end_col]
                # Check for overlap
                if start_i < end_j and start_j < end_i:
                    co_occurrence[act_i].add(act_j)

    # Convert sets to sorted lists
    return {act: sorted(list(others)) for act, others in co_occurrence.items()}

In [None]:
# def extract_xor_groups_from_cooccurrence(successor_map, co_occurrence_map):
#     """
#     Builds XOR groups from a simplified co-occurrence map.
#     Each group is a list of activities that co-occur with each other.
#     XOR is inferred between groups that do not co-occur with each other.

#     Returns:
#     - xor_groups: {anchor: list of mutually exclusive groups (each group is a list of activities)}
#     """
#     xor_groups = defaultdict(list)

#     for anchor, successors in successor_map.items():
#         if not successors:
#             continue

#         remaining = set(successors)
#         groups = []

#         while remaining:
#             act = remaining.pop()
#             group = {act}

#             # Collect all activities that co-occur with 'act'
#             for other in list(remaining):
#                 if (
#                     act in co_occurrence_map.get(other, []) or
#                     other in co_occurrence_map.get(act, [])
#                 ):
#                     group.add(other)
#                     remaining.remove(other)

#             groups.append(sorted(group))

#         if len(groups) > 1:
#             xor_groups[anchor] = groups

#     return xor_groups

from collections import defaultdict

def extract_xor_groups_from_cooccurrence(successor_map, co_occurrence_map):
    """
    Builds XOR groups from a simplified co-occurrence map.
    An activity is excluded from XOR groups if it can co-occur with the anchor.
    Returns:
    - xor_groups: {anchor: list of mutually exclusive groups (each group is a list of activities)}
    """
    xor_groups = defaultdict(list)

    for anchor, successors in successor_map.items():
        if not successors:
            continue

        # ⚠️ Filter out successors that can co-occur with the anchor
        filtered_successors = [
            act for act in successors
            if act not in co_occurrence_map.get(anchor, []) and anchor not in co_occurrence_map.get(act, [])
        ]

        remaining = set(filtered_successors)
        groups = []

        while remaining:
            act = remaining.pop()
            group = {act}

            for other in list(remaining):
                if (
                    act in co_occurrence_map.get(other, []) or
                    other in co_occurrence_map.get(act, [])
                ):
                    group.add(other)
                    remaining.remove(other)

            groups.append(sorted(group))

        if len(groups) > 1:
            xor_groups[anchor] = groups

    return xor_groups



In [None]:
xor

defaultdict(list,
            {'Check application form completeness': [['AML check',
               'Appraise property',
               'Check credit history'],
              ['Return application back to applicant']],
             'Assess loan risk': [['Reject application'],
              ['Design loan offer']],
             'Approve loan offer': [['Cancel application'],
              ['Approve application']]})

In [None]:
valid_end_activities

{'Approve application', 'Cancel application', 'Reject application'}

In [None]:
transition_dict = sim_params['transition_probabilities']
concurrent_map = mine_concurrent_activities(df)
flat_map = extract_all_successors({p: a for p, a in transition_dict.items() if len(p) == 1})
xor = extract_xor_groups_from_cooccurrence(flat_map, concurrent_map)


In [None]:
xor

defaultdict(list,
            {'Check application form completeness': [['AML check',
               'Appraise property',
               'Check credit history'],
              ['Return application back to applicant']],
             'Assess loan risk': [['Reject application'],
              ['Design loan offer']],
             'Approve loan offer': [['Cancel application'],
              ['Approve application']]})

Naive Simulator

In [None]:
def sample_activity(post_condition_queue, available_activities, performed, prerequisites):
    if post_condition_queue:
        # 🚨 Priority: Insert required post-conditions immediately after their triggering activities
        return post_condition_queue.pop(0)
    else:
        available_acts = [act for act in available_activities if all(pre in performed for pre in prerequisites.get(act, []))]
        if not available_acts:
            return None  # no more activities possible
        return random.choice(available_acts)

def assign_agent(sim_params, act):
    eligible_agents = [a for a, acts in sim_params["agent_activity_mapping"].items() if act in acts]
    return eligible_agents[0] if eligible_agents else None

def perform_activity(log, case_id, agent, act, current_time, sim_params):
    duration = sample_from_distribution(sim_params["activity_durations_dict"][agent][act])
    end_time = current_time + pd.Timedelta(seconds=duration)

    log.append({
        "case_id": case_id,
        "activity": act,
        "agent": agent,
        "start": current_time,
        "end": end_time
    })

    return end_time

def handle_post_conditions(act, post_conditions, post_condition_queue, outstanding_obligations):
    for required_post in post_conditions.get(act, []):
        if (not outstanding_obligations) or (required_post != outstanding_obligations[-1]):
            outstanding_obligations.append(required_post)
            post_condition_queue.append(required_post)
        """ 🚨 Note: Later we could mine whether this must be immediate or not"""

def fulfill_obligations(act, outstanding_obligations):
    if act in outstanding_obligations:
        outstanding_obligations.remove(act)

def finalize_case(log, case_id):
    if log:
        agent = log[-1]["agent"]
        log_end_time = log[-1]["end"]
        log.append({
            "case_id": case_id,
            "activity": "zzz_end",
            "agent": agent,
            "start": log_end_time,
            "end": log_end_time
        })

def simulate_case_dynamic(sim_params, case_id, prerequisites, valid_end_activities, all_possible_activities, post_conditions):
    current_time = sim_params["case_arrival_times"][case_id]
    performed = []
    log = []

    available_activities = set(all_possible_activities) - {"zzz_end"}

    post_condition_queue = []
    outstanding_obligations = []  # NEW: each obligation must be fulfilled independently

    while True:
        act = sample_activity(post_condition_queue, available_activities, performed, prerequisites)
        if act is None:
            break

        agent = assign_agent(sim_params, act)

        """This should not be happening, it should wait"""
        if agent is None:
            print(agent)
            if act in available_activities:
                available_activities.remove(act)
            continue  # no agent, skip

        current_time = perform_activity(log, case_id, agent, act, current_time, sim_params)
        performed.append(act)

        handle_post_conditions(act, post_conditions, post_condition_queue, outstanding_obligations)
        fulfill_obligations(act, outstanding_obligations)

        # 🚨 If we just performed a valid end activity and no post-conditions pending, terminate
        if act in valid_end_activities and not post_condition_queue and not outstanding_obligations:
            break

    finalize_case(log, case_id)

    return log


In [138]:
all_activities = set(act for agents in sim_params["agent_activity_mapping"].values() for act in agents)
valid_end_activities = find_valid_end_activities(sim_params["transition_probabilities"])
post_conditions = discover_post_conditions(df, order_by='end_time')
strict_prereqs = discover_strict_prerequisites_from_log(df, activity_col='activity', case_col='case_id', order_by='end_time')


df_simulated = []
for i in range(200):
    case_log = simulate_case_dynamic(sim_params, i, strict_prereqs, valid_end_activities, all_activities, post_conditions)
    df_simulated.extend(case_log)

df_simulated = pd.DataFrame(df_simulated)


KeyError: 'end_time'

In [None]:
# df_simulated[df_simulated['case_id']==0]

In [None]:
# df_simulated.to_csv("naive_simulator_logs.csv")

Validation 

In [None]:
def validate_simulated_log(df, prerequisites, post_conditions, valid_end_activities, case_col='case_id', activity_col='activity', order_by='start'):
    issues = []

    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()

        if not sorted_activities:
            issues.append((case_id, "Empty trace"))
            continue

        # Remove zzz_end for logic checks
        activities_no_end = [a for a in sorted_activities if a != "zzz_end"]

        # 🚨 1. Prerequisites check
        performed = set()
        for act in activities_no_end:
            required = prerequisites.get(act, [])
            if not all(pre in performed for pre in required):
                missing = [pre for pre in required if pre not in performed]
                issues.append((case_id, f"Activity '{act}' missing prerequisites {missing}"))
            performed.add(act)

        # 🚨 2. Post-conditions check
        for i, act in enumerate(activities_no_end):
            required_posts = post_conditions.get(act, [])
            future_acts = set(activities_no_end[i+1:])  # only activities after current one
            for post in required_posts:
                if post not in future_acts:
                    issues.append((case_id, f"Activity '{act}' missing required post-condition '{post}'"))

        # 🚨 3. End correctness check
        if activities_no_end:
            last_real_activity = activities_no_end[-1]
            if last_real_activity not in valid_end_activities:
                issues.append((case_id, f"Case ends incorrectly on '{last_real_activity}'"))

    return issues


In [None]:
issues = validate_simulated_log(df_simulated, strict_prereqs, post_conditions, valid_end_activities)
if not issues:
    print("✅ All simulated cases passed validation!")
else:
    print(f"❌ Found {len(issues)} problems:")
    for case_id, problem in issues:
        print(f"Case {case_id}: {problem}")


❌ Found 27 problems:
Case 5: Case ends incorrectly on 'Assess loan risk'
Case 13: Case ends incorrectly on 'Assess loan risk'
Case 15: Case ends incorrectly on 'Assess loan risk'
Case 18: Case ends incorrectly on 'Assess loan risk'
Case 21: Case ends incorrectly on 'Assess loan risk'
Case 23: Case ends incorrectly on 'Assess loan risk'
Case 28: Case ends incorrectly on 'Assess loan risk'
Case 41: Case ends incorrectly on 'Assess loan risk'
Case 52: Case ends incorrectly on 'Assess loan risk'
Case 55: Case ends incorrectly on 'Assess loan risk'
Case 57: Case ends incorrectly on 'Assess loan risk'
Case 63: Case ends incorrectly on 'Assess loan risk'
Case 74: Case ends incorrectly on 'Assess loan risk'
Case 75: Case ends incorrectly on 'Assess loan risk'
Case 88: Case ends incorrectly on 'Assess loan risk'
Case 104: Case ends incorrectly on 'Assess loan risk'
Case 111: Case ends incorrectly on 'Assess loan risk'
Case 119: Case ends incorrectly on 'Assess loan risk'
Case 125: Case ends inc

Global Scheduler Simulation Approach

Integrating Global Clock

Build a simple framework that:

Handles multiple cases arriving over time.

Tracks agent availability globally.

Lets agents propose valid activities for cases.

Picks one valid agent-action-case assignment and executes it.

Logs the result.

In [None]:
strict_prerequisites

In [None]:
prerequisites

{'Check application form completeness': [],
 'Check credit history': ['Check application form completeness'],
 'AML check': ['Check application form completeness'],
 'Appraise property': ['Check application form completeness'],
 'Assess loan risk': ['Appraise property',
  'AML check',
  'Check credit history'],
 'Design loan offer': ['Assess loan risk'],
 'Approve loan offer': ['Design loan offer'],
 'Cancel application': ['Approve loan offer'],
 'Approve application': ['Approve loan offer'],
 'Reject application': ['Assess loan risk'],
 'Return application back to applicant': ['Check application form completeness'],
 'Applicant completes form': ['Return application back to applicant']}

In [None]:
xor

defaultdict(list,
            {'Check application form completeness': [['AML check',
               'Appraise property',
               'Check credit history'],
              ['Return application back to applicant']],
             'Check credit history': [['AML check', 'Appraise property'],
              ['Assess loan risk']],
             'AML check': [['Appraise property', 'Check credit history'],
              ['Assess loan risk']],
             'Appraise property': [['Assess loan risk'],
              ['AML check', 'Check credit history']],
             'Assess loan risk': [['Reject application'],
              ['Design loan offer']],
             'Approve loan offer': [['Cancel application'],
              ['Approve application']]})

In [None]:
class ActivityRules:
    """
    Holds the "arena" rules about actions, including prerequisites, post-conditions,
    valid end activities, and XOR branching logic.
    """

    def __init__(self, prerequisites, post_conditions, transition_dict, xor_rules=None):
        self.prerequisites = prerequisites
        self.post_conditions = post_conditions
        self.valid_end_activities = self._find_valid_end_activities(transition_dict)
        self.xor_rules = xor_rules or {}  # default to empty dict if none provided

    def _find_valid_end_activities(self, transition_dict):
        valid_end_activities = set()
        for prefix, agent_dicts in transition_dict.items():
            for agent_transitions in agent_dicts.values():
                if 'zzz_end' in agent_transitions:
                    
                    if prefix:  # make sure prefix is not empty
                        valid_end_activities.add(prefix[-1])  # last activity in prefix
        
        return valid_end_activities


    def is_case_end(self, activity, outstanding_obligations):
        print(f"Is activity ending: {activity in self.valid_end_activities}")
        print(f"Obligations {outstanding_obligations}")
        return activity in self.valid_end_activities and not outstanding_obligations

    def get_available_activities(self, performed):
        return [
            a for a in self.prerequisites
            if all(p in performed for p in self.prerequisites.get(a, []))
        ]

    def is_xor_valid(self, activity: str, case) -> bool:
        """
        Checks if the activity can be executed based on XOR group logic.
        An activity is allowed if:
        - It’s not part of any XOR group, or
        - It’s the first activity executed from its group for the given anchor, or
        - It belongs to the same group as an already chosen activity for the anchor.

        XOR decisions are stored in case.xor_decisions as:
            { anchor_activity: group_id (int) }
        """
        for anchor, groups in self.xor_rules.items():
            # flatten the xor groups into (index, group) pairs
            for group_index, group in enumerate(groups):
                if activity in group:
                    chosen = case.xor_decisions.get(anchor)
                    print(f"{activity} - Chosen: {chosen}")
                    if chosen is None:
                        return True  # no group taken yet
                    if chosen == group_index:
                        return True  # same group already taken
                    return False  # different group already taken
        return True  # activity not part of any XOR group
    
    def is_activity_allowed(self, activity: str, case) -> bool:
        """
        Determines whether the given activity is executable at this point in the case.
        Enforces that:
        - Valid end activities are only allowed if there are no outstanding obligations.
        - XOR rules are respected.
        """

        # 🚫 Block premature end activities
        if activity in self.valid_end_activities and case.outstanding_obligations:
            print(f"⛔ Cannot execute end activity '{activity}' due to obligations: {case.outstanding_obligations}")
            return False

        # 🚫 Block XOR-invalid paths
        if not self.is_xor_valid(activity, case):
            return False

        return True



In [None]:
"""
State Classes
"""

class Case:
    """
    A Case object represents a case of a simulated or raw log. 
    It knows its:
        *  performed activities,
        * current_timestamp,
        * post_condition_queue,
        * outstanding_obligations.
    """
    
    def __init__(self, case_id, arrival_time, xor_decisions):
        self.case_id = case_id
        self.current_time = arrival_time
        self.performed = []
        self.outstanding_obligations = []
        self.done = False
        self.xor_decisions = xor_decisions

    def get_available_activities(self, rules: ActivityRules):
        """
        Gets available activities based on pre-conditions and prioritizes them based on outstanding 
        obligations list.
        Logic found in ActivityRules class
        """

        all_available = rules.get_available_activities(self.performed)

        # Sort so obligations come first, preserving order
        prioritized = [a for a in self.outstanding_obligations if a in all_available]
        rest = [a for a in all_available if a not in prioritized]

        return prioritized + rest
    

    # post-condition related functions
    
    def add_obligation(self, activity):
        """
        Adds a post-condition activity to the obligations list
        """
        if activity not in self.outstanding_obligations:
            self.outstanding_obligations.append(activity)

    def remove_obligation(self, activity):
        """
        Removes a post-condition activity from the obligations list
        """
        if activity in self.outstanding_obligations:
            self.outstanding_obligations.remove(activity)


class Agent:
    """
    An Agent object represents an agent mined from a simulated or raw log. 
    It knows:
        * What activities it can perform,
        * When it's available,
        * Proposes actions it could do for a case.
    """
    def __init__(self, agent_id, capable_activities, calendar):
        self.agent_id = agent_id
        self.capable_activities = set(capable_activities)
        self.available_at = pd.Timestamp.min.replace(tzinfo=pytz.UTC)  # updated as tasks are performed
        self.calendar = calendar


In [None]:
class Proposal:
    """
    Agents make a Proposal object for each activity they offer to do
    """
    def __init__(self, case, agent, activity, start_time, duration):
        self.case = case
        self.agent = agent
        self.activity = activity
        self.start_time = start_time
        self.end_time = start_time + pd.Timedelta(seconds=duration)

class LogEntry:
    def __init__(self, case_id, agent_id, activity, start, end):
        self.case_id = case_id
        self.agent_id = agent_id
        self.activity = activity
        self.start = start
        self.end = end

    def to_dict(self):
        return {
            "case_id": self.case_id,
            "agent": self.agent_id,
            "activity": self.activity,
            "start": self.start,
            "end": self.end
        }



In [None]:
class Simulation:
    def __init__(self, agents, cases, rules, durations, case_arrivals):
        self.agents = agents
        self.cases = cases
        self.rules = rules
        self.durations = durations
        self.case_arrivals = case_arrivals
        self.valid_end_activities=valid_end_activities
        self.log = []

    def make_proposals(self, agent, case, rules: ActivityRules, durations: dict, calendars: dict, available_activities: list):
        """
        Generate a list of proposals for a given agent to perform eligible activities
        for a given case at this point in the simulation, considering:
        - prerequisites,
        - post-condition obligations,
        - agent availability,
        - activity durations,
        - and the agent's working calendar.

        Parameters:
        - agent: Agent object making the proposal.
        - case: Case object requesting work.
        - rules: ActivityRules object containing prerequisites and post-conditions.
        - durations: Dict[agent_id][activity] → DurationDistribution object.
        - calendars: Dict[agent_id] → RCalendar object for validating working hours.

        Returns:
        - List of Proposal objects, one for each valid agent-case-activity match.

        Notes:
        - Activities with missing or empty duration distributions are skipped.
        - Proposals are only created if the agent is available *and* the full duration
        fits within their working hours.
        - Duration is sampled upfront (may be deferred in the future).
        - No cost or optimization logic is applied yet.
        
        Limitations / TODOs:
        - Doesn’t handle fallback logic if activity doesn't fit in calendar (e.g., shifting start time).
        - Doesn’t yet consider multitasking or interruptions.
        """

        proposals = []        

        for act in available_activities:
            if act in agent.capable_activities:
                if not rules.is_activity_allowed(act, case):
                    continue
                try:
                    dist = durations[agent.agent_id][act]
                    if not dist:  # Skip empty distributions
                        continue
                    duration = sample_from_distribution(dist)
                except (KeyError, AttributeError):
                    continue  # Duration missing or malformed

                start_time = max(agent.available_at, case.current_time)

                # ✅ Check calendar constraints before proposing
                calendar = calendars.get(agent.agent_id, None)
                if calendar is not None:
                    calendar_json = calendar.intervals_to_json()
                    if not self.is_within_calendar(start_time, duration, calendar_json):
                        continue  # Skip proposals that violate calendar
                else:
                    print(f"Warning: No calendar found for agent {agent.agent_id}, assuming always available.")

                proposals.append(Proposal(case, agent, act, start_time, duration))

        return proposals


    def is_within_calendar(self, start_time: pd.Timestamp, duration: float, calendar_json: list) -> bool:
        """
        Checks if the given start_time and duration fall entirely within
        the agent's working hours based on calendar_json (from intervals_to_json()).

        Parameters:
        - start_time: When the activity would begin.
        - duration: Duration in seconds.
        - calendar_json: List of availability windows from RCalendar.intervals_to_json().

        Returns:
        - True if the entire activity fits within any working window of that day.
        - False otherwise.

        Limitations:
        - Does not check cross-day durations.
        """

        from datetime import datetime

        end_time = start_time + pd.Timedelta(seconds=duration)
        day = start_time.strftime('%A').upper()

        for entry in calendar_json:
            if entry['from'] == day:
                try:
                    begin = datetime.strptime(entry['beginTime'], '%H:%M:%S').time()
                    end = datetime.strptime(entry['endTime'], '%H:%M:%S').time()
                except ValueError:
                    continue  # Skip malformed entries

                activity_start = start_time.time()
                activity_end = end_time.time()

                if begin <= activity_start and activity_end <= end:
                    return True

        return False


    def handle_post_conditions(self, activity, case):
        # print(f"Handling postconditions of action  {activity}")
        for post_act in self.rules.post_conditions.get(activity, []):
            case.add_obligation(post_act)

    
    # place holder
    def select_proposal(self, proposals: list) -> Proposal:
        """
        Select one proposal with weighted random logic:
        - Boosts proposals that fulfill obligations
        - Penalizes proposals that repeat the case's last activity

        Returns:
        - A single Proposal object
        """

        if not proposals:
            return None

        weights = []
        for p in proposals:
            weight = 1.0

            # 📌 Boost if it's an obligation
            if p.activity in p.case.outstanding_obligations:
                weight *= 10  # Total = 5

            # 🚫 Penalize if it's an immediate repeat
            elif len(p.case.performed) > 0 and p.activity == p.case.performed[-1]:
                weight *= 0.01  # 90% penalty, Total = 0.1
            
            else:
                weight *= 0.05

            weights.append(weight)

        # for p, w in zip(proposals, weights):
            # print(f"Weighted option: {p.activity} by {p.agent.agent_id} → weight {w}")


        return random.choices(proposals, weights=weights, k=1)[0]



    def perform_proposal(self, proposal: Proposal, rules: ActivityRules):
        """
        Apply the selected proposal by updating simulation state:
        - Advance case time
        - Update performed activities
        - Update agent availability
        - Handle post-condition obligations
        - Log the event

        Parameters:
        - proposal: The selected Proposal to execute
        - rules: ActivityRules object to handle post-conditions

        Side Effects:
        - Updates case and agent state in-place
        - Appends a LogEntry to self.log
        """

        case = proposal.case
        agent = proposal.agent
        activity = proposal.activity

        # 1. Advance simulation time for the case
        case.current_time = proposal.end_time

        # 2. Mark activity as done
        case.performed.append(activity)

        # 3. Update agent's availability
        agent.available_at = proposal.end_time

        # 4. Add post-condition obligations
        for post_act in rules.post_conditions.get(activity, []):
            case.add_obligation(post_act)

        # 5. Remove executed action from outstanding_obligations
        if activity in case.outstanding_obligations:
            case.remove_obligation(activity)

        # 6. Log the action
        entry = LogEntry(case_id=case.case_id, agent_id=agent.agent_id,
                        activity=activity, start=proposal.start_time, end=proposal.end_time)
        self.log.append(entry)

        # 7. Optionally check if case is now finished
        if rules.is_case_end(activity, case.outstanding_obligations):
            case.done = True

    
    def tick(self, calendars: dict):
        """
        Perform one tick of the simulation; push the simulation forward by one step:
        - Collects all valid proposals across agents and active cases.
        - Selects one proposal to execute (currently random).
        - Applies the proposal, updating case and agent state.
        
        Parameters:
        - calendars: Dict[agent_id] → RCalendar

        Returns:
        - True if a proposal was executed
        - False if no valid proposals (simulation may need to advance time)

        Notes:
        - No concurrent activities available
        """

        all_proposals = []

        for case in self.cases:
            if case.done:
                continue
            available_activities = case.get_available_activities(self.rules)
            print(f"Available activities: {available_activities}")
            for agent in self.agents:
                proposals = self.make_proposals(agent, case, self.rules, self.durations, calendars, available_activities)
                all_proposals.extend(proposals)

        if not all_proposals:
            return False
        
        # print(f"All proposals:")
        # for p in all_proposals:
        #     print(f"Agent {p.agent.agent_id} proposes {p.activity} from {p.start_time} to {p.end_time}")


        selected = self.select_proposal(all_proposals)
        print(f"Outstanding obligations: {case.outstanding_obligations}")
        self.perform_proposal(selected, self.rules)
        print(f"Selected activity: {selected.activity} by agent {selected.agent.agent_id}")

        return True




Main

In [None]:
xor

defaultdict(list,
            {'Check application form completeness': [['AML check',
               'Appraise property',
               'Check credit history'],
              ['Return application back to applicant']],
             'Check credit history': [['AML check', 'Appraise property'],
              ['Assess loan risk']],
             'AML check': [['Appraise property', 'Check credit history'],
              ['Assess loan risk']],
             'Appraise property': [['Assess loan risk'],
              ['AML check', 'Check credit history']],
             'Assess loan risk': [['Reject application'],
              ['Design loan offer']],
             'Approve loan offer': [['Cancel application'],
              ['Approve application']]})

In [None]:
post_conditions = discover_post_conditions(df, order_by='end_time')
prerequisites = discover_strict_prerequisites_from_log(df, activity_col='activity', case_col='case_id', order_by='end_time')
calendars = sim_params['res_calendars']
durations = sim_params['activity_durations_dict']
transition_dict = sim_params["transition_probabilities"]
agent_ids = list(sim_params['agent_activity_mapping'].keys())

In [None]:
sim_params.keys()

dict_keys(['activity_durations_dict', 'activities_without_waiting_time', 'roles', 'res_calendars', 'agent_activity_mapping', 'transition_probabilities_autonomous', 'agent_transition_probabilities_autonomous', 'agent_transition_probabilities', 'transition_probabilities', 'max_activity_count_per_case', 'case_arrival_times', 'case_arrival_times_val', 'agent_to_resource', 'determine_automatically', 'prerequisites', 'timers', 'central_orchestration', 'start_timestamp', 'execution_type'])

In [None]:
len(sim_params['case_arrival_times'])

201

In [None]:
df['case_id'].unique()

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        14,  15,  13,  16,  18,  17,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  34,  33,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  48,  47,  49,  50,  51,
        52,  53,  54,  56,  57,  58,  55,  60,  59,  62,  61,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  76,  75,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130,
       131, 132, 129, 133, 134, 136, 137, 138, 135, 139, 140, 142, 141,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 167, 166, 168,
       169, 170, 172, 171, 173, 174, 175, 176, 177, 178, 180, 17

In [None]:

# --- Create rules object ---
rules = ActivityRules(
    prerequisites=prerequisites,
    post_conditions=post_conditions,
    transition_dict=transition_dict
)

# --- Create agent list ---
agents = []
for agent_id in agent_ids:
    capable_acts = set(durations[agent_id].keys())
    agent = Agent(agent_id, capable_activities=capable_acts, calendar=calendars[agent_id])
    agents.append(agent)

# --- Create a single case ---
arrival_time = sim_params['case_arrival_times'][0]
case = Case("0", arrival_time, xor_decisions = xor)
case.performed = []
case.current_time = arrival_time

# --- Create simulation ---
sim = Simulation(
    agents=agents,
    cases=[case],
    rules=rules,
    durations=durations,
    case_arrivals={"C1": arrival_time},
)


# --- Run the full simulation until the case is done ---
step = 0
max_steps = 100  # safety stop to avoid infinite loops - this should be mined

while not case.done and step < max_steps:
    print(f"\n--- Tick {step + 1} ---")
    did_execute = sim.tick(calendars)

    if did_execute:
        last = sim.log[-1].to_dict()
        print(f"✅ Executed: {last['activity']} by {last['agent']} from {last['start']} to {last['end']}")
    else:
        print("⚠️ No actions possible this tick. (All agents busy or activities blocked)")
        # Optional: implement sim.advance_time() if needed

    step += 1

# --- Final State ---
print("\n--- Simulation complete ---")
print(f"Total steps: {step}")
print("Performed activities:", case.performed)
print("Outstanding obligations:", case.outstanding_obligations)
print("Case done:", case.done)
print(f"Total log entries: {len(sim.log)}")



--- Tick 1 ---
Available activities: ['Check application form completeness']
Outstanding obligations: []
Is activity ending: False
Obligations ['Appraise property', 'AML check', 'Check credit history']
Selected activity: Check application form completeness by agent 1
✅ Executed: Check application form completeness by 1 from 2023-04-20 08:00:00+00:00 to 2023-04-20 08:08:35.313874930+00:00

--- Tick 2 ---
Available activities: ['Appraise property', 'AML check', 'Check credit history', 'Check application form completeness', 'Return application back to applicant']
Outstanding obligations: ['Appraise property', 'AML check', 'Check credit history']
Is activity ending: False
Obligations ['Appraise property', 'Check credit history', 'Assess loan risk']
Selected activity: AML check by agent 7
✅ Executed: AML check by 7 from 2023-04-20 08:08:35.313874930+00:00 to 2023-04-20 08:12:54.984952053+00:00

--- Tick 3 ---
Available activities: ['Appraise property', 'Check credit history', 'Check applic

Try to simulate more than 1 cases

In [None]:
import pandas as pd

# --- Storage for all successful logs ---
all_logs = []
successful_cases = 0
case_counter = 0

max_cases = 5

while successful_cases < max_cases:
    print(f"\n🚀 Starting simulation for case {case_counter}")

    # --- Create fresh case and simulation ---
    arrival_time = sim_params['case_arrival_times'][case_counter]
    case = Case(str(case_counter), arrival_time, xor_decisions=xor)
    case.performed = []
    case.current_time = arrival_time

    agents = []
    for agent_id in agent_ids:
        capable_acts = set(durations[agent_id].keys())
        agents.append(Agent(agent_id, capable_activities=capable_acts, calendar=calendars[agent_id]))

    sim = Simulation(
        agents=agents,
        cases=[case],
        rules=rules,
        durations=durations,
        case_arrivals={str(case_counter): arrival_time}
    )

    # --- Run simulation for this case ---
    step = 0
    max_steps = 100
    while not case.done and step < max_steps:
        did_execute = sim.tick(calendars)
        if not did_execute:
            print(f"⚠️ No activity executed at tick {step}, aborting case {case_counter}")
            break
        step += 1

    # --- Check and store result ---
    if case.done:
        print(f"✅ Case {case_counter} completed successfully with {len(sim.log)} log entries.")
        all_logs.extend([entry.to_dict() for entry in sim.log])
        successful_cases += 1
    else:
        print(f"❌ Case {case_counter} did not complete. Retrying with new case.")

    case_counter += 1

# --- Convert to DataFrame ---
df_simulated_log = pd.DataFrame(all_logs)
print("\n✅ Simulation completed for all cases.")



🚀 Starting simulation for case 0
Available activities: ['Check application form completeness']
Outstanding obligations: []
Is activity ending: False
Obligations ['Appraise property', 'AML check', 'Check credit history']
Selected activity: Check application form completeness by agent 1
Available activities: ['Appraise property', 'AML check', 'Check credit history', 'Check application form completeness', 'Return application back to applicant']
Outstanding obligations: ['Appraise property', 'AML check', 'Check credit history']
Is activity ending: False
Obligations ['Appraise property', 'AML check', 'Assess loan risk']
Selected activity: Check credit history by agent 0
Available activities: ['Appraise property', 'AML check', 'Check application form completeness', 'Check credit history', 'Return application back to applicant']
Outstanding obligations: ['Appraise property', 'AML check', 'Assess loan risk']
Is activity ending: False
Obligations ['AML check', 'Assess loan risk']
Selected acti

In [None]:
df_simulated_log

Unnamed: 0,case_id,agent,activity,start,end
0,1,5,Check application form completeness,2023-04-20 08:30:00+00:00,2023-04-20 08:40:13.431984334+00:00
1,1,10,Check credit history,2023-04-20 08:40:13.431984334+00:00,2023-04-20 08:58:05.920227079+00:00
2,1,3,Appraise property,2023-04-20 08:58:05.920227079+00:00,2023-04-20 09:07:13.571693147+00:00
3,1,2,AML check,2023-04-20 09:07:13.571693147+00:00,2023-04-20 09:34:09.991831201+00:00
4,1,16,Assess loan risk,2023-04-20 09:34:09.991831201+00:00,2023-04-20 09:54:09.991831201+00:00
5,1,11,Design loan offer,2023-04-20 09:54:09.991831201+00:00,2023-04-20 10:08:25.989167976+00:00
6,1,17,Approve loan offer,2023-04-20 10:08:25.989167976+00:00,2023-04-20 10:28:25.989167976+00:00
7,1,9,Approve application,2023-04-20 10:28:25.989167976+00:00,2023-04-20 10:33:25.989167976+00:00
8,2,9,Check application form completeness,2023-04-20 09:00:00+00:00,2023-04-20 09:25:01.128430751+00:00
9,2,5,Check credit history,2023-04-20 09:25:01.128430751+00:00,2023-04-20 09:30:43.781492480+00:00


In [None]:
def validate_simulated_log(df, prerequisites, post_conditions, valid_end_activities, case_col='case_id', activity_col='activity', order_by='start'):
    issues = []

    for case_id, group in df.groupby(case_col):
        sorted_activities = group.sort_values(by=order_by)[activity_col].tolist()

        if not sorted_activities:
            issues.append((case_id, "Empty trace"))
            continue

        # Remove zzz_end for logic checks
        activities_no_end = [a for a in sorted_activities if a != "zzz_end"]

        # 🚨 1. Prerequisites check
        performed = set()
        for act in activities_no_end:
            required = prerequisites.get(act, [])
            if not all(pre in performed for pre in required):
                missing = [pre for pre in required if pre not in performed]
                issues.append((case_id, f"Activity '{act}' missing prerequisites {missing}"))
            performed.add(act)

        # 🚨 2. Post-conditions check
        for i, act in enumerate(activities_no_end):
            required_posts = post_conditions.get(act, [])
            future_acts = set(activities_no_end[i+1:])  # only activities after current one
            for post in required_posts:
                if post not in future_acts:
                    issues.append((case_id, f"Activity '{act}' missing required post-condition '{post}'"))

        # 🚨 3. End correctness check
        if activities_no_end:
            last_real_activity = activities_no_end[-1]
            if last_real_activity not in valid_end_activities:
                issues.append((case_id, f"Case ends incorrectly on '{last_real_activity}'"))

    return issues


In [None]:
validate_simulated_log(df_simulated_log, prerequisites, post_conditions, valid_end_activities, case_col='case_id', activity_col='activity', order_by='start')

[]

In [15]:
sim_params['agent_activity_mapping'][18]

['Applicant completes form']

In [18]:
sim_params.keys()

dict_keys(['activity_durations_dict', 'activities_without_waiting_time', 'roles', 'res_calendars', 'agent_activity_mapping', 'transition_probabilities_autonomous', 'agent_transition_probabilities_autonomous', 'agent_transition_probabilities', 'transition_probabilities', 'max_activity_count_per_case', 'case_arrival_times', 'case_arrival_times_val', 'agent_to_resource', 'determine_automatically', 'prerequisites', 'timers', 'central_orchestration', 'start_timestamp', 'execution_type'])

In [23]:
sim_params['activity_durations_dict'][18]

{'Check application form completeness': [],
 'Assess loan risk': [],
 'Return application back to applicant': [],
 'Approve application': [],
 'Check credit history': [],
 'Design loan offer': [],
 'Approve loan offer': [],
 'Applicant completes form': <source.arrival_distribution.DurationDistribution at 0x16d83fc50>,
 'Appraise property': [],
 'Reject application': [],
 'Cancel application': [],
 'AML check': [],
 'zzz_end': []}

In [22]:
sim_params['activity_durations_dict'][18]

set(sim_params['activity_durations_dict'][18].keys())

{'AML check',
 'Applicant completes form',
 'Appraise property',
 'Approve application',
 'Approve loan offer',
 'Assess loan risk',
 'Cancel application',
 'Check application form completeness',
 'Check credit history',
 'Design loan offer',
 'Reject application',
 'Return application back to applicant',
 'zzz_end'}

In [149]:

df_simulated = []
for i in range(200):
    case_log = simulate_case_dynamic(sim_params, i, strict_prereqs, valid_end_activities, all_activities, post_conditions)
    df_simulated.extend(case_log)

df_simulated = pd.DataFrame(df_simulated)

NameError: name 'simulate_case_dynamic' is not defined