In [1]:
%load_ext autoreload
%autoreload 2

# Then your regular imports
import logging
import pandas as pd
from utils import prepare_df, group_cases_by_trace
from incremental_softmax_recovery import incremental_softmax_recovery

In [2]:
# Configure logging with selective DEBUG for our modules only
logging.basicConfig(
    level=logging.INFO,           # Set root to INFO (reduces third-party noise)
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    force=True                    # Force override of any existing handlers (useful in Jupyter)
)

# Enable DEBUG for our specific modules only
our_modules = [
    'classes', 
    'incremental_softmax_recovery', 
    'beam_search', 
    'utils', 
    'conformance_checking',
    'data_processing',
    'petri_model',
    'calibration'
]

for module_name in our_modules:
    logging.getLogger(module_name).setLevel(logging.DEBUG)

# Silence noisy third-party libraries
logging.getLogger('graphviz').setLevel(logging.WARNING)  # Only show warnings/errors from graphviz
logging.getLogger('matplotlib').setLevel(logging.WARNING)  # Silence matplotlib if present
logging.getLogger('PIL').setLevel(logging.WARNING)  # Silence PIL if present

print("✅ Logging configured: DEBUG for our modules, INFO+ for third-party libraries")


✅ Logging configured: DEBUG for our modules, INFO+ for third-party libraries


In [3]:
# load your DataFrame and softmax list
result = prepare_df('50salads')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [4]:
# Updated configuration for incremental_softmax_recovery with new parameters
config = {
    # === Data Splitting ===
    'n_train_traces': 10,                     # Number of training traces
    'n_test_traces': 10,                     # Number of test traces  
    'train_cases': None,                     # Specific train case IDs (overrides n_train_traces)
    'test_cases': None,                      # Specific test case IDs (overrides n_test_cases)
    'ensure_train_variant_diversity': True,  # Enforce distinct variants in training
    'ensure_test_variant_diversity': False,  # Enforce distinct variants in testing
    
    # === Sampling Configuration ===
    'sequential_sampling': True,             # True: sample from activity runs, False: uniform sampling
    'n_indices': None,                       # Events to sample per trace (when sequential_sampling=False)
    'n_per_run': 10,                          # Events per activity run (when sequential_sampling=True)
    'independent_sampling': True,            # Each trace gets different random seed
    
    # === Recovery Method Selection (NEW!) ===
    'recovery_method': 'conformance',        # "conformance" or "beam_search" - choose your algorithm!
    'prob_threshold': 1e-6,                  # Unified threshold for activity filtering (both methods)
    
    # === Conformance Checking Parameters (NEW!) ===
    'chunk_size': 15,                        # Size of chunks for conformance processing
    
    # === Beam Search Parameters ===
    'beam_width': 1,                        # [BEAM SEARCH ONLY] Number of candidates to maintain
    'beam_score_alpha': 1.0,                # [BEAM SEARCH ONLY] Weight between avg cost and total cost
    'completion_patience': 20,               # [BEAM SEARCH ONLY] Extra iterations after first completion
    
    # === Cost Function ===
    'cost_function': "linear",                # "linear", "logarithmic", or callable
    'model_move_cost': 1.0,                   # Cost for model-only moves
    'log_move_cost': 1.0,                     # Cost for log-only moves  
    'tau_move_cost': 0.0,                     # Cost for silent (tau) moves
    'non_sync_penalty': 1.0,                  # Penalty for non-sync moves
    'conformance_switch_penalty_weight': 1.0, # Weight for switch penalty in conformance checking
    
    # === Conditional Probabilities (Beam Search Only) ===
    'use_cond_probs': True,                  # [BEAM SEARCH ONLY] Enable conditional probabilities
    'max_hist_len': 3,                       # [BEAM SEARCH ONLY] Maximum history length for conditioning
    'lambdas': [0.1, 0.3, 0.6],              # [BEAM SEARCH ONLY] Blending weights for n-gram smoothing
    'alpha': 0.95,                            # [BEAM SEARCH ONLY] History vs base probability weight (0=history, 1=base)
    'use_ngram_smoothing': True,             # [BEAM SEARCH ONLY] Apply n-gram smoothing
    
    # === Temperature Calibration ===
    'use_calibration': True,                 # Enable temperature scaling
    'temp_bounds': (1.0, 10.0),              # Temperature optimization bounds
    'temperature': None,                     # Manual temperature (bypasses optimization)
    
    # === Logging ===
    'verbose': True,                          # Enable logging output
    'log_level': logging.INFO,                # Logging level (logging.DEBUG for more details)
    
    # === Miscellaneous ===
    'round_precision': 2,                     # Decimal places for probability rounding
    'random_seed': 101,                       # Random seed for reproducibility
    'save_model_path': "./discovered_petri_net",  # Path for saved model (without extension)
    'save_model': True,                     # Save model to PDF (set to True if you want visualization)
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results
results_df, accuracy_dict, prob_dict = output

2025-08-29 12:58:09,332 - incremental_softmax_recovery - INFO - Starting incremental softmax recovery.
2025-08-29 12:58:09,346 - incremental_softmax_recovery - INFO - Validated sequential case IDs (found 40 unique cases) and 40 softmax matrices.
2025-08-29 12:58:09,348 - incremental_softmax_recovery - INFO - Validated sampling parameters: sequential runs with n_per_run=10.
2025-08-29 12:58:09,349 - incremental_softmax_recovery - INFO - Using recovery method: conformance
2025-08-29 12:58:09,349 - incremental_softmax_recovery - INFO - Validated input parameters: beam_width=1, alpha=0.95, round_precision=2, prob_threshold=1e-06.
2025-08-29 12:58:09,350 - incremental_softmax_recovery - INFO - Prepared cost function: linear (model=1.0, log=1.0, tau=0.0).
2025-08-29 12:58:09,350 - incremental_softmax_recovery - INFO - Prepared softmax arrays: 40 traces with individual shape (19, 5687).
2025-08-29 12:58:10,144 - incremental_softmax_recovery - INFO - Filtered log and softmax matrices: 237820 -

PNG visualization saved to: discovered_petri_net.png
PDF visualization saved to: discovered_petri_net.pdf


2025-08-29 12:59:40,761 - classes - INFO - Built marking transition map with 1450 markings
2025-08-29 12:59:40,761 - incremental_softmax_recovery - INFO - Computed marking-to-transition map with 1450 reachable markings.
2025-08-29 12:59:40,765 - incremental_softmax_recovery - INFO - Built conditional probability dictionary: 274 histories, avg 1.8 activities per history.
2025-08-29 12:59:41,123 - incremental_softmax_recovery - INFO - Prepared 10 test softmax matrices with calibration (temperature=1.54).
2025-08-29 12:59:41,124 - incremental_softmax_recovery - INFO - Extracted 10 test case IDs for processing.


case 1/10 — conformance

2025-08-29 12:59:41,125 - incremental_softmax_recovery - DEBUG - Processing test case 1/10 (20) using 'conformance'


case 1/10 chunk 15/15

2025-08-29 13:02:04,560 - classes - INFO - Conformance total 220 steps in 143.433s (1.5 steps/s) across 15 chunks
2025-08-29 13:02:04,593 - incremental_softmax_recovery - DEBUG - Case 1/10 (20) [conformance]: SKTR=0.891, Argmax=0.864, Sequence length=220


case 2/10 — conformance

2025-08-29 13:02:04,597 - incremental_softmax_recovery - DEBUG - Processing test case 2/10 (11) using 'conformance'


case 2/10 chunk 16/16

2025-08-29 13:02:49,185 - classes - INFO - Conformance total 230 steps in 44.587s (5.2 steps/s) across 16 chunks
2025-08-29 13:02:49,208 - incremental_softmax_recovery - DEBUG - Case 2/10 (11) [conformance]: SKTR=0.813, Argmax=0.822, Sequence length=230


case 3/10 — conformance

2025-08-29 13:02:49,209 - incremental_softmax_recovery - DEBUG - Processing test case 3/10 (5) using 'conformance'


case 3/10 chunk 17/17

2025-08-29 13:05:27,561 - classes - INFO - Conformance total 250 steps in 158.350s (1.6 steps/s) across 17 chunks
2025-08-29 13:05:27,631 - incremental_softmax_recovery - DEBUG - Case 3/10 (5) [conformance]: SKTR=0.664, Argmax=0.652, Sequence length=250


case 4/10 — conformance

2025-08-29 13:05:27,632 - incremental_softmax_recovery - DEBUG - Processing test case 4/10 (36) using 'conformance'


case 4/10 chunk 10/10

2025-08-29 13:10:13,412 - classes - INFO - Conformance total 150 steps in 285.778s (0.5 steps/s) across 10 chunks
2025-08-29 13:10:13,426 - incremental_softmax_recovery - DEBUG - Case 4/10 (36) [conformance]: SKTR=0.747, Argmax=0.693, Sequence length=150


case 5/10 — conformance

2025-08-29 13:10:13,427 - incremental_softmax_recovery - DEBUG - Processing test case 5/10 (14) using 'conformance'


case 5/10 chunk 14/14

2025-08-29 13:10:34,510 - classes - INFO - Conformance total 210 steps in 21.082s (10.0 steps/s) across 14 chunks
2025-08-29 13:10:34,527 - incremental_softmax_recovery - DEBUG - Case 5/10 (14) [conformance]: SKTR=0.871, Argmax=0.867, Sequence length=210


case 6/10 — conformance

2025-08-29 13:10:34,527 - incremental_softmax_recovery - DEBUG - Processing test case 6/10 (4) using 'conformance'


case 6/10 chunk 17/17

2025-08-29 13:14:34,437 - classes - INFO - Conformance total 250 steps in 239.908s (1.0 steps/s) across 17 chunks
2025-08-29 13:14:34,463 - incremental_softmax_recovery - DEBUG - Case 6/10 (4) [conformance]: SKTR=0.624, Argmax=0.648, Sequence length=250


case 7/10 — conformance

2025-08-29 13:14:34,464 - incremental_softmax_recovery - DEBUG - Processing test case 7/10 (30) using 'conformance'


case 7/10 chunk 12/12

2025-08-29 13:14:57,921 - classes - INFO - Conformance total 180 steps in 23.454s (7.7 steps/s) across 12 chunks
2025-08-29 13:14:57,942 - incremental_softmax_recovery - DEBUG - Case 7/10 (30) [conformance]: SKTR=0.917, Argmax=0.900, Sequence length=180


case 8/10 — conformance

2025-08-29 13:14:57,943 - incremental_softmax_recovery - DEBUG - Processing test case 8/10 (15) using 'conformance'


case 8/10 chunk 14/14

2025-08-29 13:15:06,574 - classes - INFO - Conformance total 210 steps in 8.629s (24.3 steps/s) across 14 chunks
2025-08-29 13:15:06,618 - incremental_softmax_recovery - DEBUG - Case 8/10 (15) [conformance]: SKTR=0.876, Argmax=0.876, Sequence length=210


case 9/10 — conformance

2025-08-29 13:15:06,620 - incremental_softmax_recovery - DEBUG - Processing test case 9/10 (3) using 'conformance'


case 9/10 chunk 12/12

2025-08-29 13:15:12,162 - classes - INFO - Conformance total 180 steps in 5.540s (32.5 steps/s) across 12 chunks
2025-08-29 13:15:12,176 - incremental_softmax_recovery - DEBUG - Case 9/10 (3) [conformance]: SKTR=0.967, Argmax=0.967, Sequence length=180


case 10/10 — conformance

2025-08-29 13:15:12,176 - incremental_softmax_recovery - DEBUG - Processing test case 10/10 (18) using 'conformance'


case 10/10 chunk 13/13

2025-08-29 13:20:10,918 - classes - INFO - Conformance total 190 steps in 298.741s (0.6 steps/s) across 13 chunks
2025-08-29 13:20:10,958 - incremental_softmax_recovery - DEBUG - Case 10/10 (18) [conformance]: SKTR=0.721, Argmax=0.711, Sequence length=190





2025-08-29 13:20:10,963 - incremental_softmax_recovery - INFO - Built results DataFrame and accuracy dictionary.
2025-08-29 13:20:10,964 - incremental_softmax_recovery - INFO - Softmax trace recovery completed using conformance method.


In [6]:
# Compute average accuracy for each metric
avg_sktr_accuracy = sum(accuracy_dict['sktr_accuracy']) / len(accuracy_dict['sktr_accuracy'])
avg_argmax_accuracy = sum(accuracy_dict['argmax_accuracy']) / len(accuracy_dict['argmax_accuracy'])

print(f"Average SKTR Accuracy: {avg_sktr_accuracy:.4f}")
print(f"Average Argmax Accuracy: {avg_argmax_accuracy:.4f}")

# Show original dictionary for reference
accuracy_dict

Average SKTR Accuracy: 0.8091
Average Argmax Accuracy: 0.7999


{'sktr_accuracy': [0.8909090909090909,
  0.8130434782608695,
  0.664,
  0.7466666666666667,
  0.8714285714285714,
  0.624,
  0.9166666666666666,
  0.8761904761904762,
  0.9666666666666667,
  0.7210526315789474],
 'argmax_accuracy': [0.8636363636363636,
  0.8217391304347826,
  0.652,
  0.6933333333333334,
  0.8666666666666667,
  0.648,
  0.9,
  0.8761904761904762,
  0.9666666666666667,
  0.7105263157894737]}

In [9]:
prob_dict[('6',)]

{'6': 0.9, '10': 0.06, '9': 0.03, '7': 0.01}

In [7]:
# Show all rows for case '28' without truncation, including full list values
case_28_df = results_df[results_df['case:concept:name'] == '4']
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None, 'display.width', None):
    display(case_28_df)
total_cost = case_28_df['sktr_move_cost'].sum()
print(f"Total SKTR move cost for case 28: {total_cost:.4f}")

Unnamed: 0,case:concept:name,step,sktr_activity,argmax_activity,ground_truth,all_probs,all_activities,is_correct,cumulative_accuracy,sktr_move_cost
1060,4,0,17,17,17,"[0.01, 0.01, 0.01, 0.01, 0.0, 0.04, 0.02, 0.0, 0.02, 0.01, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.0, 0.76, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.24
1061,4,1,17,17,17,"[0.06, 0.02, 0.02, 0.01, 0.02, 0.04, 0.03, 0.01, 0.05, 0.2, 0.04, 0.02, 0.01, 0.03, 0.01, 0.02, 0.01, 0.28, 0.12]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.72
1062,4,2,17,17,17,"[0.01, 0.0, 0.0, 0.0, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0, 0.88, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.12
1063,4,3,17,17,17,"[0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0, 0.86, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.14
1064,4,4,17,17,17,"[0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0, 0.85, 0.04]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.15
1065,4,5,17,17,17,"[0.01, 0.01, 0.01, 0.0, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.0, 0.84, 0.04]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.16
1066,4,6,17,17,17,"[0.02, 0.01, 0.01, 0.01, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.0, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0, 0.86, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.14
1067,4,7,17,17,17,"[0.02, 0.01, 0.02, 0.01, 0.01, 0.02, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.0, 0.79, 0.04]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.21
1068,4,8,17,17,17,"[0.01, 0.01, 0.02, 0.01, 0.01, 0.01, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.0, 0.81, 0.04]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.19
1069,4,9,17,17,17,"[0.01, 0.01, 0.02, 0.01, 0.01, 0.02, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.0, 0.8, 0.04]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.2


Total SKTR move cost for case 28: 95.9900


In [53]:
# Filter to rows where SKTR and argmax predictions agree
agreements = results_df[results_df['sktr_activity'] == results_df['argmax_activity']]

# Among those, filter to where the agreed activity matches ground truth
correct_agreements = agreements[agreements['sktr_activity'] == agreements['ground_truth']]

# Compute the percentage
percentage = (len(correct_agreements) / len(agreements)) * 100 if len(agreements) > 0 else 0

print(f"When argmax and SKTR agree, they are correct {percentage:.2f}% of the time.")

When argmax and SKTR agree, they are correct 83.41% of the time.
