In [3]:
%load_ext autoreload
%autoreload 2

# Then your regular imports
import logging
import pandas as pd
from utils import prepare_df, group_cases_by_trace
from incremental_softmax_recovery import incremental_softmax_recovery

In [4]:
# Configure root logger to show DEBUG messages
logging.basicConfig(
    level=logging.DEBUG,          # Set to DEBUG (lowest level)
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',  # Optional: Custom format for timestamps/names
    force=True                    # Force override of any existing handlers (useful in Jupyter)
)


In [5]:
# load your DataFrame and softmax list
result = prepare_df('50salads')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [46]:
# Correct configuration for incremental_softmax_recovery
config = {
    # === Data Splitting ===
    'n_train_traces': 10,                     # Number of training traces
    'n_test_traces': 10,                     # Number of test traces  
    'train_cases': None,                     # Specific train case IDs (overrides n_train_traces)
    'test_cases': None,                      # Specific test case IDs (overrides n_test_cases)
    'ensure_train_variant_diversity': True,  # Enforce distinct variants in training
    'ensure_test_variant_diversity': False,  # Enforce distinct variants in testing
    
    # === Sampling Configuration ===
    'use_full_traces': False,                # If True, skip event sampling and use complete traces and softmax matrices.
    'sequential_sampling': True,             # True: sample from activity runs, False: uniform sampling
    'n_indices': None,                       # Events to sample per trace (when sequential_sampling=False)
    'n_per_run': 15,                          # Events per activity run (when sequential_sampling=True)
    'independent_sampling': True,            # Each trace gets different random seed
    
    # === Beam Search ===
    'beam_width': 1,                        # Number of candidates to maintain
    'activity_prob_threshold': 0.0,          # Minimum probability to consider activity
    'beam_score_alpha': 1.0,                # Weight between avg cost and total cost
    'completion_patience': 20,                # Extra iterations after first completion
    
    # === Cost Function ===
    'cost_function': "linear",               # "linear", "logarithmic", or callable
    'model_move_cost': 1.0,                  # Cost for model-only moves
    'log_move_cost': 1.0,                    # Cost for log-only moves  
    'tau_move_cost': 0.0,                   # Cost for silent (tau) moves
    'non_sync_penalty': 1.0,                 # Penalty for non-sync moves
    
    # === Conditional Probabilities ===
    'use_cond_probs': True,                  # Enable conditional probabilities
    'max_hist_len': 3,                       # Maximum history length for conditioning
    'lambdas': [0.1, 0.3, 0.6],              # Blending weights for n-gram smoothing
    'alpha': 0.2,                           # History vs base probability weight (0=base only, 1=history only)
    'use_ngram_smoothing': True,             # Apply n-gram smoothing
    
    # === Future Probability Lookahead ===
    'lookahead_window': 5,                   # Number of future timestamps to average (0=disabled)
    'beta': 0.5,                             # Future vs current probability weight (0=current only, 1=future only)
    
    # === Temperature Calibration ===
    'use_calibration': True,                 # Enable temperature scaling
    'temp_bounds': (1.0, 3.0),              # Temperature optimization bounds
    'temperature': 3.0,                     # Manual temperature (bypasses optimization)
    
    # === Logging ===
    'verbose': True,                          # Enable logging output
    'log_level': logging.INFO,                # Logging level (logging.DEBUG for more details)
    
    # === Miscellaneous ===
    'round_precision': 2,                     # Decimal places for probability rounding
    'random_seed': 321,                       # Random seed for reproducibility
    'save_model_path': "./discovered_petri_net",  # Path for saved model (without extension)
    'save_model': True,                     # Save model to PDF (set to True if you want visualization)
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results
results_df, accuracy_dict, prob_dict = output

2025-07-24 17:57:02,142 - incremental_softmax_recovery - INFO - Starting incremental softmax recovery.
2025-07-24 17:57:02,152 - incremental_softmax_recovery - INFO - Validated sequential case IDs (found 40 unique cases) and 40 softmax matrices.
2025-07-24 17:57:02,152 - incremental_softmax_recovery - INFO - Validated sampling parameters: sequential runs with n_per_run=15.
2025-07-24 17:57:02,153 - incremental_softmax_recovery - INFO - Validated input parameters: beam_width=1, alpha=0.2, round_precision=2.
2025-07-24 17:57:02,153 - incremental_softmax_recovery - INFO - Prepared cost function: linear (model=1.0, log=1.0, tau=0.0).
2025-07-24 17:57:02,154 - incremental_softmax_recovery - INFO - Prepared softmax arrays: 40 traces with individual shape (19, 5687).
2025-07-24 17:57:02,666 - incremental_softmax_recovery - INFO - Processed log and softmax matrices: 237820 -> 12536 events (5.3% retained).
2025-07-24 17:57:02,707 - incremental_softmax_recovery - INFO - Performed train/test spli

PNG visualization saved to: discovered_petri_net.png


2025-07-24 17:57:03,664 - graphviz.rendering - DEBUG - delete './discovered_petri_net'
2025-07-24 17:57:03,666 - incremental_softmax_recovery - INFO - Petri net visualization saved to ./discovered_petri_net.pdf
2025-07-24 17:57:03,666 - incremental_softmax_recovery - INFO - Discovered Petri net model: 58 places, 87 transitions.
2025-07-24 17:57:03,666 - incremental_softmax_recovery - INFO - Computing marking-to-transition map (tau-reachability) for discovered Petri net...


PDF visualization saved to: discovered_petri_net.pdf


2025-07-24 17:59:08,877 - classes - INFO - Built marking transition map with 1450 markings
2025-07-24 17:59:08,878 - incremental_softmax_recovery - INFO - Computed marking-to-transition map with 1450 reachable markings.
2025-07-24 17:59:08,890 - incremental_softmax_recovery - INFO - Built conditional probability dictionary: 274 histories, avg 1.8 activities per history.
2025-07-24 17:59:08,891 - incremental_softmax_recovery - INFO - Built conditional probability dictionary.
2025-07-24 17:59:08,902 - incremental_softmax_recovery - INFO - Prepared 10 test softmax matrices with calibration (temperature=3.00).
2025-07-24 17:59:08,903 - incremental_softmax_recovery - INFO - Extracted 10 test case IDs for processing.
2025-07-24 17:59:08,904 - incremental_softmax_recovery - DEBUG - Case 1/10: 28
2025-07-24 17:59:09,465 - beam_search - INFO - Active beam is empty. Stopping search.
2025-07-24 17:59:09,467 - beam_search - INFO - Beam search finished. Found 1 candidates that completed the trace.


In [47]:
# Compute average accuracy for each metric
avg_sktr_accuracy = sum(accuracy_dict['sktr_accuracy']) / len(accuracy_dict['sktr_accuracy'])
avg_argmax_accuracy = sum(accuracy_dict['argmax_accuracy']) / len(accuracy_dict['argmax_accuracy'])

print(f"Average SKTR Accuracy: {avg_sktr_accuracy:.4f}")
print(f"Average Argmax Accuracy: {avg_argmax_accuracy:.4f}")

# Show original dictionary for reference
accuracy_dict

Average SKTR Accuracy: 0.7513
Average Argmax Accuracy: 0.7971


{'sktr_accuracy': [0.8666666666666667,
  0.8355555555555556,
  0.3511111111111111,
  0.8202898550724638,
  0.8851851851851852,
  0.456,
  0.8385964912280702,
  0.8878787878787879,
  0.6601671309192201,
  0.9111111111111111],
 'argmax_accuracy': [0.8925925925925926,
  0.7022222222222222,
  0.6888888888888889,
  0.8289855072463768,
  0.9111111111111111,
  0.6346666666666667,
  0.7333333333333333,
  0.8939393939393939,
  0.7298050139275766,
  0.9555555555555556]}

In [52]:
prob_dict[ ('2',)]

{'2': 0.62, '3': 0.29, '0': 0.09}

In [51]:
# Show all rows for case '28' without truncation, including full list values
case_28_df = results_df[results_df['case:concept:name'] == '39']
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None, 'display.width', None):
    display(case_28_df)
total_cost = case_28_df['sktr_move_cost'].sum()
print(f"Total SKTR move cost for case 28: {total_cost:.4f}")

Unnamed: 0,case:concept:name,step,sktr_activity,argmax_activity,ground_truth,all_probs,all_activities,is_correct,cumulative_accuracy,sktr_move_cost
495,39,0,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.03, 0.04, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.03, 0.42, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.45
496,39,1,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.03, 0.04, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.03, 0.43, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.55
497,39,2,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.03, 0.04, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.03, 0.43, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.54
498,39,3,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.03, 0.04, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.03, 0.43, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.53
499,39,4,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.03, 0.47, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.52
500,39,5,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.02, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.03, 0.48, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.52
501,39,6,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.04, 0.02, 0.03, 0.03, 0.04, 0.03, 0.04, 0.03, 0.01, 0.03, 0.03, 0.02, 0.49, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.51
502,39,7,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.03, 0.02, 0.03, 0.03, 0.04, 0.03, 0.03, 0.03, 0.01, 0.03, 0.03, 0.02, 0.49, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.51
503,39,8,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.03, 0.02, 0.03, 0.03, 0.04, 0.03, 0.03, 0.03, 0.01, 0.03, 0.03, 0.02, 0.49, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.5
504,39,9,17,17,17,"[0.03, 0.03, 0.03, 0.04, 0.03, 0.03, 0.02, 0.03, 0.03, 0.04, 0.03, 0.03, 0.03, 0.01, 0.03, 0.03, 0.02, 0.49, 0.03]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.5


Total SKTR move cost for case 28: 166.9500
