In [1]:
%load_ext autoreload
%autoreload 2

In [7]:
import logging
import pandas as pd
from utils import prepare_df, group_cases_by_trace, compute_accuracies_by_case
from incremental_softmax_recovery import incremental_softmax_recovery

In [8]:
# Configure logging with selective DEBUG for our modules only
logging.basicConfig(
    level=logging.INFO,           # Set root to INFO (reduces third-party noise)
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    force=True                    # Force override of any existing handlers (useful in Jupyter)
)

# Enable DEBUG for our specific modules only
our_modules = [
    'classes', 
    'incremental_softmax_recovery', 
    'utils', 
    'conformance_checking',
    'data_processing',
    'petri_model',
    'calibration'
]

for module_name in our_modules:
    logging.getLogger(module_name).setLevel(logging.DEBUG)

# Silence noisy third-party libraries
logging.getLogger('graphviz').setLevel(logging.WARNING)  # Only show warnings/errors from graphviz
logging.getLogger('matplotlib').setLevel(logging.WARNING)  # Silence matplotlib if present
logging.getLogger('PIL').setLevel(logging.WARNING)  # Silence PIL if present

print("✅ Logging configured: DEBUG for our modules, INFO+ for third-party libraries")


✅ Logging configured: DEBUG for our modules, INFO+ for third-party libraries


In [9]:
# load your DataFrame and softmax list
result = prepare_df('50salads')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [17]:
# Configuration for incremental_softmax_recovery (conformance-only)
config = {
    # === Data Splitting ===
    'n_train_traces': 10,
    'n_test_traces': 10,
    'train_cases': None,
    'test_cases': None,
    'ensure_train_variant_diversity': True,
    'ensure_test_variant_diversity': False,

    # === Sampling Configuration ===
    'sequential_sampling': True,   # True: sample from activity runs, False: uniform sampling
    'n_indices': None,             # Required when sequential_sampling=False
    'n_per_run': 7,               # Required when sequential_sampling=True
    'independent_sampling': True,

    # === Conformance Parameters ===
    'prob_threshold': 1e-6,        # Probability threshold for filtering activities
    'chunk_size': 11,              # Window size for chunked conformance
    'conformance_switch_penalty_weight': 1.0,  # Weight on label-switch penalty
    'max_hist_len': 3,             # History length for switch-penalty probability dict
    'merge_mismatched_boundaries': False,       # Merge adjacent chunks when boundary labels disagree
    'restrict_to_observed_moves': False,    # Restrict labeled model/sync moves to those observed in training

    # === Cost Function ===
    'cost_function': "linear",    # "linear", "logarithmic", or callable
    'model_move_cost': 1.0,
    'log_move_cost': 1.0,
    'tau_move_cost': 0.0,
    'non_sync_penalty': 1.0,

    # === Temperature Calibration ===
    'use_calibration': True,
    'temp_bounds': (1.0, 10.0),
    'temperature': None,

    # === Logging ===
    'verbose': True,
    'log_level': logging.INFO,

    # === Miscellaneous ===
    'round_precision': 2,
    'random_seed': 101,
    'save_model_path': "./discovered_petri_net",
    'save_model': True,
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results
results_df, accuracy_dict, prob_dict = output

2025-09-08 15:04:46,709 - incremental_softmax_recovery - INFO - Starting incremental softmax recovery (conformance-only).
2025-09-08 15:04:46,719 - incremental_softmax_recovery - INFO - Validated sequential case IDs (found 40 unique cases) and 40 softmax matrices.
2025-09-08 15:04:46,719 - incremental_softmax_recovery - INFO - Validated sampling parameters: sequential runs with n_per_run=7.
2025-09-08 15:04:46,719 - incremental_softmax_recovery - INFO - Validated input parameters: round_precision=2, prob_threshold=1e-06.
2025-09-08 15:04:46,719 - incremental_softmax_recovery - INFO - Prepared cost function: linear (model=1.0, log=1.0, tau=0.0).
2025-09-08 15:04:46,719 - incremental_softmax_recovery - INFO - Prepared softmax arrays: 40 traces with individual shape (19, 5687).
2025-09-08 15:04:47,275 - incremental_softmax_recovery - INFO - Filtered log and softmax matrices: 237820 -> 5852 events (2.5% retained).
2025-09-08 15:04:47,295 - incremental_softmax_recovery - INFO - Performed tr

PNG visualization saved to: discovered_petri_net.png
PDF visualization saved to: discovered_petri_net.pdf


2025-09-08 15:06:09,775 - classes - INFO - Built marking transition map with 1450 markings
2025-09-08 15:06:09,776 - incremental_softmax_recovery - INFO - Computed marking-to-transition map with 1450 reachable markings.
2025-09-08 15:06:09,779 - incremental_softmax_recovery - INFO - Built conditional probability dictionary: 274 histories, avg 1.8 activities per history.
2025-09-08 15:06:09,996 - incremental_softmax_recovery - INFO - Prepared 10 test softmax matrices with calibration (temperature=1.54).
2025-09-08 15:06:09,997 - incremental_softmax_recovery - INFO - Extracted 10 test case IDs for processing.
2025-09-08 15:06:09,997 - incremental_softmax_recovery - DEBUG - Using adaptive chunk size: 7 (base: 11, complexity: 145)


case 1/10 — conformance

2025-09-08 15:06:09,998 - incremental_softmax_recovery - DEBUG - Processing test case 1/10 (20) using 'conformance'


case 1/10 chunk 1/22

2025-09-08 15:06:09,998 - classes - INFO - Building marking transition map (lazy loading) with max_tau_depth=100
2025-09-08 15:07:27,835 - classes - INFO - Built marking transition map with 1450 markings


case 1/10 chunk 22/22

2025-09-08 15:07:39,776 - classes - INFO - Conformance total 154 steps in 89.772s (1.7 steps/s) across 22 chunks
2025-09-08 15:07:39,857 - incremental_softmax_recovery - DEBUG - Case 1/10 (20) [conformance]: SKTR=0.942, Argmax=0.903, Sequence length=154


case 2/10 — conformance

2025-09-08 15:07:39,862 - incremental_softmax_recovery - DEBUG - Processing test case 2/10 (11) using 'conformance'


case 2/10 chunk 23/23

2025-09-08 15:07:44,927 - classes - INFO - Conformance total 161 steps in 5.061s (31.8 steps/s) across 23 chunks
2025-09-08 15:07:44,988 - incremental_softmax_recovery - DEBUG - Case 2/10 (11) [conformance]: SKTR=0.807, Argmax=0.814, Sequence length=161


case 3/10 — conformance

2025-09-08 15:07:44,990 - incremental_softmax_recovery - DEBUG - Processing test case 3/10 (5) using 'conformance'


case 3/10 chunk 25/25

2025-09-08 15:08:03,477 - classes - INFO - Conformance total 175 steps in 18.485s (9.5 steps/s) across 25 chunks
2025-09-08 15:08:03,495 - incremental_softmax_recovery - DEBUG - Case 3/10 (5) [conformance]: SKTR=0.686, Argmax=0.669, Sequence length=175


case 4/10 — conformance

2025-09-08 15:08:03,496 - incremental_softmax_recovery - DEBUG - Processing test case 4/10 (36) using 'conformance'


case 4/10 chunk 15/15

2025-09-08 15:08:25,428 - classes - INFO - Conformance total 105 steps in 21.931s (4.8 steps/s) across 15 chunks
2025-09-08 15:08:25,437 - incremental_softmax_recovery - DEBUG - Case 4/10 (36) [conformance]: SKTR=0.676, Argmax=0.667, Sequence length=105


case 5/10 — conformance

2025-09-08 15:08:25,439 - incremental_softmax_recovery - DEBUG - Processing test case 5/10 (14) using 'conformance'


case 5/10 chunk 21/21

2025-09-08 15:08:26,604 - classes - INFO - Conformance total 147 steps in 1.164s (126.3 steps/s) across 21 chunks
2025-09-08 15:08:26,618 - incremental_softmax_recovery - DEBUG - Case 5/10 (14) [conformance]: SKTR=0.884, Argmax=0.884, Sequence length=147


case 6/10 — conformance

2025-09-08 15:08:26,619 - incremental_softmax_recovery - DEBUG - Processing test case 6/10 (4) using 'conformance'


case 6/10 chunk 25/25

2025-09-08 15:08:47,387 - classes - INFO - Conformance total 175 steps in 20.767s (8.4 steps/s) across 25 chunks
2025-09-08 15:08:47,409 - incremental_softmax_recovery - DEBUG - Case 6/10 (4) [conformance]: SKTR=0.663, Argmax=0.657, Sequence length=175


case 7/10 — conformance

2025-09-08 15:08:47,410 - incremental_softmax_recovery - DEBUG - Processing test case 7/10 (30) using 'conformance'


case 7/10 chunk 18/18

2025-09-08 15:08:48,197 - classes - INFO - Conformance total 126 steps in 0.787s (160.0 steps/s) across 18 chunks
2025-09-08 15:08:48,208 - incremental_softmax_recovery - DEBUG - Case 7/10 (30) [conformance]: SKTR=0.897, Argmax=0.905, Sequence length=126


case 8/10 — conformance

2025-09-08 15:08:48,209 - incremental_softmax_recovery - DEBUG - Processing test case 8/10 (15) using 'conformance'


case 8/10 chunk 21/21

2025-09-08 15:08:48,641 - classes - INFO - Conformance total 147 steps in 0.431s (340.9 steps/s) across 21 chunks
2025-09-08 15:08:48,656 - incremental_softmax_recovery - DEBUG - Case 8/10 (15) [conformance]: SKTR=0.871, Argmax=0.864, Sequence length=147


case 9/10 — conformance

2025-09-08 15:08:48,656 - incremental_softmax_recovery - DEBUG - Processing test case 9/10 (3) using 'conformance'


case 9/10 chunk 18/18

2025-09-08 15:08:50,726 - classes - INFO - Conformance total 126 steps in 2.068s (60.9 steps/s) across 18 chunks
2025-09-08 15:08:50,739 - incremental_softmax_recovery - DEBUG - Case 9/10 (3) [conformance]: SKTR=0.968, Argmax=0.952, Sequence length=126


case 10/10 — conformance

2025-09-08 15:08:50,740 - incremental_softmax_recovery - DEBUG - Processing test case 10/10 (18) using 'conformance'


case 10/10 chunk 19/19

2025-09-08 15:09:05,154 - classes - INFO - Conformance total 133 steps in 14.412s (9.2 steps/s) across 19 chunks
2025-09-08 15:09:05,168 - incremental_softmax_recovery - DEBUG - Case 10/10 (18) [conformance]: SKTR=0.767, Argmax=0.699, Sequence length=133





2025-09-08 15:09:05,171 - incremental_softmax_recovery - INFO - Built results DataFrame and accuracy dictionary.
2025-09-08 15:09:05,172 - incremental_softmax_recovery - INFO - Softmax trace recovery completed using conformance method.


In [18]:
# standard window-basedconformance 
accuracies = compute_accuracies_by_case(results_df)

accuracies

Unnamed: 0,case:concept:name,sktr_accuracy,argmax_accuracy
0,11,0.807453,0.813665
1,14,0.884354,0.884354
2,15,0.870748,0.863946
3,18,0.766917,0.699248
4,20,0.941558,0.902597
5,3,0.968254,0.952381
6,30,0.896825,0.904762
7,36,0.67619,0.666667
8,4,0.662857,0.657143
9,5,0.685714,0.668571


In [15]:
accuracies = compute_accuracies_by_case(results_df)

accuracies

Unnamed: 0,case:concept:name,sktr_accuracy,argmax_accuracy
0,11,0.813665,0.813665
1,14,0.829932,0.884354
2,15,0.816327,0.863946
3,18,0.75188,0.699248
4,20,0.896104,0.902597
5,3,0.912698,0.952381
6,30,0.857143,0.904762
7,36,0.638095,0.666667
8,4,0.628571,0.657143
9,5,0.657143,0.668571


In [19]:
# Show all rows for case '28' without truncation, including full list values
case_28_df = results_df[results_df['case:concept:name'] == '14']
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None, 'display.width', None):
    display(case_28_df)
total_cost = case_28_df['sktr_move_cost'].sum()
print(f"Total SKTR move cost for case 28: {total_cost:.4f}")

Unnamed: 0,case:concept:name,step,sktr_activity,argmax_activity,ground_truth,all_probs,all_activities,is_correct,cumulative_accuracy,sktr_move_cost
595,14,0,17,17,17,"[0.01, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.92, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.08
596,14,1,17,17,17,"[0.01, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.92, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.08
597,14,2,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.94, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.06
598,14,3,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.95, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.05
599,14,4,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.95, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.05
600,14,5,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.95, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.05
601,14,6,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.94, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.06
602,14,0,7,7,7,"[0.0, 0.0, 0.0, 0.01, 0.01, 0.01, 0.01, 0.8, 0.03, 0.02, 0.01, 0.02, 0.0, 0.0, 0.01, 0.01, 0.0, 0.04, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,1.06
603,14,1,7,7,7,"[0.0, 0.0, 0.0, 0.01, 0.01, 0.01, 0.01, 0.84, 0.02, 0.01, 0.01, 0.02, 0.0, 0.0, 0.01, 0.01, 0.0, 0.02, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.16
604,14,2,7,7,7,"[0.0, 0.0, 0.0, 0.0, 0.01, 0.01, 0.01, 0.88, 0.02, 0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.12


Total SKTR move cost for case 28: 34.8400
