In [1]:
import sys
import os

# Add project root to sys.path
sys.path.append(os.path.abspath(os.path.join("../..")))  # if you're in optimizer/

import pandas as pd
from source.utils import sample_from_distribution
import random
import numpy as np
import datetime
import pytz
import itertools

In [5]:
import pickle

with open("../parameters/simulation_parameters.pkl", "rb") as f:
    sim_params = pickle.load(f)

sim_params.keys()

transition_dict = sim_params['transition_probabilities']

In [19]:
df = pd.read_csv("../../raw_data/LoanApp.csv.gz", compression='gzip')

Attempt from Monday

In [15]:
def extract_all_successors(transition_dict):
    """
    Converts a nested transition dictionary to a flat mapping:
    prefix_activity → list of all possible successor activities (non-zero prob)

    Parameters:
    - transition_dict: dict of {prefix: {agent_id: {activity: prob}}}

    Returns:
    - dict of {activity: [possible next activities]}
    """
    flat_successors = {}
    seen_anchors = set()

    for prefix, agent_dict in transition_dict.items():
        if not prefix:
            continue  # skip empty prefixes
        if prefix[-1] in seen_anchors:
            continue
        anchor = prefix[-1]  # last activity in the prefix
        seen_anchors.add(anchor)
        # print(anchor, ":")
        successor_set = set()
        for agent_transitions in agent_dict.values():
            for act, prob in agent_transitions.items(): 
                if (act not in successor_set):
                    # print(act, prob)
                    if prob > 0:
                        # print(act)
                        successor_set.add(act)
        # print(successor_set)

        flat_successors[anchor] = sorted(successor_set)
        # print(flat_successors[anchor])
    # print("END", flat_successors['Check application form completeness'])
    return flat_successors


In [8]:
valid_dicts = {p: a for p, a in transition_dict.items() if len(p) == 1}

In [39]:
flat_map = extract_all_successors(valid_dicts)

In [40]:
flat_map

{'Check application form completeness': ['AML check',
  'Appraise property',
  'Check credit history',
  'Return application back to applicant'],
 'Check credit history': ['AML check',
  'Appraise property',
  'Assess loan risk'],
 'AML check': ['Appraise property',
  'Assess loan risk',
  'Check credit history'],
 'Appraise property': ['AML check',
  'Assess loan risk',
  'Check credit history'],
 'Assess loan risk': ['Design loan offer', 'Reject application'],
 'Design loan offer': ['Approve loan offer'],
 'Approve loan offer': ['Approve application', 'Cancel application'],
 'Cancel application': ['zzz_end'],
 'Approve application': ['zzz_end'],
 'Reject application': ['zzz_end'],
 'Return application back to applicant': ['Applicant completes form'],
 'Applicant completes form': ['Check application form completeness']}

In [44]:
from collections import defaultdict

def extract_xor_groups_from_cooccurrence(successor_map, co_occurrence_map):
    """
    Builds XOR groups from a simplified co-occurrence map.
    Each group is a list of activities that co-occur with each other.
    XOR is inferred between groups that do not co-occur with each other.

    Returns:
    - xor_groups: {anchor: list of mutually exclusive groups (each group is a list of activities)}
    """
    xor_groups = defaultdict(list)

    for anchor, successors in successor_map.items():
        if not successors:
            continue

        remaining = set(successors)
        groups = []

        while remaining:
            act = remaining.pop()
            group = {act}

            # Collect all activities that co-occur with 'act'
            for other in list(remaining):
                if (
                    act in co_occurrence_map.get(other, []) or
                    other in co_occurrence_map.get(act, [])
                ):
                    group.add(other)
                    remaining.remove(other)

            groups.append(sorted(group))

        if len(groups) > 1:
            xor_groups[anchor] = groups

    return xor_groups


In [49]:
import pandas as pd
from collections import defaultdict

def mine_concurrent_activities(df, case_col='case_id', activity_col='activity',
                                start_col='start_time', end_col='end_time'):
    """
    For each activity, detect other activities that can run concurrently
    by overlapping time windows in the same case.

    Parameters:
    - df: Event log with case_id, activity, start_time, end_time

    Returns:
    - co_occurrence: dict {activity: [other activities that overlapped with it]}
    """
    df = df.copy()
    df[start_col] = pd.to_datetime(df[start_col])
    df[end_col] = pd.to_datetime(df[end_col])

    co_occurrence = defaultdict(set)

    for case_id, group in df.groupby(case_col):
        group = group.sort_values(by=start_col)
        for i, row_i in group.iterrows():
            act_i, start_i, end_i = row_i[activity_col], row_i[start_col], row_i[end_col]
            for j, row_j in group.iterrows():
                if i == j:
                    continue
                act_j, start_j, end_j = row_j[activity_col], row_j[start_col], row_j[end_col]
                # Check for overlap
                if start_i < end_j and start_j < end_i:
                    co_occurrence[act_i].add(act_j)

    # Convert sets to sorted lists
    return {act: sorted(list(others)) for act, others in co_occurrence.items()}


In [50]:
concurrent_map = mine_concurrent_activities(df)

for act, overlaps in concurrent_map.items():
    print(f"{act} can run concurrently with: {overlaps}")


Check credit history can run concurrently with: ['AML check', 'Appraise property']
AML check can run concurrently with: ['Appraise property', 'Check credit history']
Appraise property can run concurrently with: ['AML check', 'Check credit history']


In [45]:
def print_xor_groups(xor_groups):
    print("\n📌 XOR Groupings by Anchor Activity:")
    for anchor, groups in xor_groups.items():
        print(f"\nAfter '{anchor}':")
        for i, group in enumerate(groups):
            print(f"  Option {i+1}: {group}")


In [54]:


xor = extract_xor_groups_from_cooccurrence(flat_map, concurrent_map)
print_xor_groups(xor)



📌 XOR Groupings by Anchor Activity:

After 'Check application form completeness':
  Option 1: ['AML check', 'Appraise property', 'Check credit history']
  Option 2: ['Return application back to applicant']

After 'Check credit history':
  Option 1: ['AML check', 'Appraise property']
  Option 2: ['Assess loan risk']

After 'AML check':
  Option 1: ['Appraise property', 'Check credit history']
  Option 2: ['Assess loan risk']

After 'Appraise property':
  Option 1: ['AML check', 'Check credit history']
  Option 2: ['Assess loan risk']

After 'Assess loan risk':
  Option 1: ['Design loan offer']
  Option 2: ['Reject application']

After 'Approve loan offer':
  Option 1: ['Cancel application']
  Option 2: ['Approve application']


In [38]:
# next_actions = []
# for case_id in df['case_id'].unique():
#     case = df[df['case_id'] == case_id].reset_index(drop=True)  # Get events for this case
#     for i in range(len(case) - 1):  # stop at second to last row
#         if case.loc[i, 'activity'] == 'Applicant completes form':
#             next_action = case.loc[i + 1, 'activity']
#             if next_action not in next_actions:
#                 next_actions.append(next_action)
# next_actions