In [3]:
import os
import sys
import pandas as pd

# Get the absolute path to optimizer/source
notebook_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
source_dir = os.path.abspath(os.path.join(notebook_dir, '..', 'source'))


In [4]:
# Add optimizer/source to sys.path
if source_dir not in sys.path:
    sys.path.insert(0, source_dir)

# Now import modules directly from source/
from state import Case, Agent
from simulation import Simulation
from optimizer import prepare_inputs
from utils import (
    discover_post_conditions,
    extract_all_successors,
    mine_concurrent_activities,
    extract_xor_groups_from_cooccurrence,
    discover_prerequisites_from_log,
    validate_simulated_log,
)
from activity_rules import ActivityRules

In [5]:
sim_params, agent_ids, durations, calendars, rules, xor, possible_cases = prepare_inputs("../parameters/simulation_parameters.pkl",raw_logs_path="../raw_data/LoanApp.csv.gz")
        

In [6]:
xor

defaultdict(list,
            {'Check application form completeness': [['AML check',
               'Appraise property',
               'Check credit history'],
              ['Return application back to applicant']],
             'Check credit history': [['AML check', 'Appraise property'],
              ['Assess loan risk']],
             'AML check': [['Appraise property', 'Check credit history'],
              ['Assess loan risk']],
             'Appraise property': [['Assess loan risk'],
              ['AML check', 'Check credit history']],
             'Assess loan risk': [['Design loan offer'],
              ['Reject application']],
             'Approve loan offer': [['Approve application'],
              ['Cancel application']]})

In [22]:
xor

defaultdict(list,
            {'Check application form completeness': [['Return application back to applicant'],
              ['AML check', 'Appraise property', 'Check credit history']],
             'Check credit history': [['AML check', 'Appraise property'],
              ['Assess loan risk']],
             'AML check': [['Appraise property', 'Check credit history'],
              ['Assess loan risk']],
             'Appraise property': [['AML check', 'Check credit history'],
              ['Assess loan risk']],
             'Assess loan risk': [['Reject application'],
              ['Design loan offer']],
             'Approve loan offer': [['Cancel application'],
              ['Approve application']]})

In [23]:
def run_simulation(sim_param_path, raw_logs_path, max_cases=5, max_steps=100):
    sim_params, agent_ids, durations, calendars, rules, xor, possible_cases = prepare_inputs(sim_param_path, raw_logs_path)
    
    all_logs = []
    successful_cases = 0
    case_counter = 0

    while (successful_cases < max_cases) and (case_counter <= possible_cases):
        print(f"\n🚀 Starting simulation for case {case_counter}")

        arrival_time = sim_params['case_arrival_times'][case_counter]
        case = Case(str(case_counter), arrival_time, xor_decisions={})
        case.performed = []
        case.current_time = arrival_time

        agents = []
        for agent_id in agent_ids:
            capable_acts = set(durations[agent_id].keys())
            agents.append(Agent(agent_id, capable_activities=capable_acts, calendar=calendars[agent_id]))

        sim = Simulation(
            agents=agents,
            cases=[case],
            rules=rules,
            durations=durations,
            case_arrivals={str(case_counter): arrival_time}
        )

        step = 0
        while not case.done and step < max_steps:
            if not sim.tick(calendars):
                print(f"⚠️ No activity executed at tick {step}, aborting case {case_counter}")
                break
            step += 1

        if case.done:
            print(f"✅ Case {case_counter} completed successfully with {len(sim.log)} log entries.")
            all_logs.extend([entry.to_dict() for entry in sim.log])
            successful_cases += 1
        else:
            print(f"❌ Case {case_counter} did not complete. Retrying with new case.")

        print(f"In case {case_counter} the following were performed: {case.performed}")

        case_counter += 1
        print(f"Performed: {case.performed}")
        break

    df_simulated_log = pd.DataFrame(all_logs)
    print("\n✅ Simulation completed for all cases.")
    return df_simulated_log

In [24]:
df = run_simulation("../parameters/simulation_parameters.pkl", max_cases=5, max_steps=100, raw_logs_path="../raw_data/LoanApp.csv.gz")


🚀 Starting simulation for case 0
Available activities: ['Check application form completeness']
Selected activity: Check application form completeness by agent 12
Available activities: ['Appraise property', 'AML check', 'Check credit history', 'Check application form completeness', 'Return application back to applicant']
Selected activity: Check credit history by agent 12
Available activities: ['Appraise property', 'AML check', 'Check application form completeness', 'Check credit history', 'Return application back to applicant']
Selected activity: Appraise property by agent 6
Available activities: ['AML check', 'Check application form completeness', 'Check credit history', 'Appraise property', 'Return application back to applicant']
Selected activity: AML check by agent 2
Available activities: ['Check application form completeness', 'Check credit history', 'AML check', 'Appraise property', 'Assess loan risk', 'Return application back to applicant']
Selected activity: Return application

In [7]:
df

In [8]:
sim_params, agent_ids, durations, calendars, rules, xor, possible_cases = prepare_inputs("../parameters/simulation_parameters.pkl", "../raw_data/LoanApp.csv.gz")

In [9]:
raw_logs=pd.read_csv("../raw_data/LoanApp.csv.gz", compression='gzip')
prerequisites = discover_prerequisites_from_log(raw_logs, activity_col='activity', case_col='case_id', order_by='end_time')
post_conditions = discover_post_conditions(raw_logs)
def find_valid_end_activities(transition_dict):
    valid_end_activities = set()

    for prefix, agent_dicts in transition_dict.items():
        for agent_transitions in agent_dicts.values():
            # print(agent_transitions)
            if 'zzz_end' in agent_transitions:
                if prefix:  # make sure prefix is not empty
                    valid_end_activities.add(prefix[-1])  # last activity in prefix

    return valid_end_activities



valid_end_activities = find_valid_end_activities(sim_params["transition_probabilities"])

In [10]:
df

In [11]:
validate_simulated_log(df, prerequisites, post_conditions, valid_end_activities, 
                            case_col='case_id', activity_col='activity', order_by='start')

KeyError: 'case_id'