In [12]:
%load_ext autoreload
%autoreload 2

# Then your regular imports
import logging
import pandas as pd
from utils import prepare_df, group_cases_by_trace
from incremental_softmax_recovery import incremental_softmax_recovery

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
# load your DataFrame and softmax list
result = prepare_df('50salads')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [19]:
# Correct configuration for incremental_softmax_recovery
config = {
    # === Data Splitting ===
    'n_train_traces': 10,                     # Number of training traces
    'n_test_traces': 10,                     # Number of test traces  
    'train_cases': None,                     # Specific train case IDs (overrides n_train_traces)
    'test_cases': None,                      # Specific test case IDs (overrides n_test_cases)
    'ensure_train_variant_diversity': True,  # Enforce distinct variants in training
    'ensure_test_variant_diversity': False,  # Enforce distinct variants in testing
    
    # === Sampling Configuration ===
    'sequential_sampling': True,             # True: sample from activity runs, False: uniform sampling
    'n_indices': None,                       # Events to sample per trace (when sequential_sampling=False)
    'n_per_run': 2,                          # Events per activity run (when sequential_sampling=True)
    'independent_sampling': True,            # Each trace gets different random seed
    
    # === Beam Search ===
    'beam_width': 50,                        # Number of candidates to maintain
    'activity_prob_threshold': 0.0,          # Minimum probability to consider activity
    'beam_score_alpha': 1.0,                # Weight between avg cost and total cost
    'completion_patience': 20,                # Extra iterations after first completion
    
    # === Cost Function ===
    'cost_function': "linear",               # "linear", "logarithmic", or callable
    'model_move_cost': 1.0,                  # Cost for model-only moves
    'log_move_cost': 1.0,                    # Cost for log-only moves  
    'tau_move_cost': 0.0,                   # Cost for silent (tau) moves
    'non_sync_penalty': 1.0,                 # Penalty for non-sync moves
    
    # === Conditional Probabilities ===
    'use_cond_probs': True,                  # Enable conditional probabilities
    'max_hist_len': 3,                       # Maximum history length for conditioning
    'lambdas': [0.1, 0.3, 0.6],              # Blending weights for n-gram smoothing
    'alpha': 0.8,                            # History vs base probability weight (0=history, 1=base)
    'use_ngram_smoothing': True,             # Apply n-gram smoothing
    
    # === Temperature Calibration ===
    'use_calibration': True,                 # Enable temperature scaling
    'temp_bounds': (1.0, 10.0),              # Temperature optimization bounds
    'temperature': None,                     # Manual temperature (bypasses optimization)
    
    # === Logging ===
    'verbose': True,                          # Enable logging output
    'log_level': logging.INFO,                # Logging level (logging.DEBUG for more details)
    
    # === Miscellaneous ===
    'round_precision': 2,                     # Decimal places for probability rounding
    'random_seed': 321,                       # Random seed for reproducibility
    'save_model_path': "./discovered_petri_net",  # Path for saved model (without extension)
    'save_model': True,                     # Save model to PDF (set to True if you want visualization)
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results
results_df, accuracy_dict, prob_dict = output

2025-07-16 13:46:06,210 - incremental_softmax_recovery - INFO - Starting incremental softmax recovery.
2025-07-16 13:46:06,236 - incremental_softmax_recovery - INFO - Validated sequential case IDs (found 40 unique cases) and 40 softmax matrices.
2025-07-16 13:46:06,237 - incremental_softmax_recovery - INFO - Validated sampling parameters: sequential runs with n_per_run=2.
2025-07-16 13:46:06,237 - incremental_softmax_recovery - INFO - Validated input parameters: beam_width=50, alpha=0.8, round_precision=2.
2025-07-16 13:46:06,239 - incremental_softmax_recovery - INFO - Prepared cost function: linear (model=1.0, log=1.0, tau=0.0).
2025-07-16 13:46:06,239 - incremental_softmax_recovery - INFO - Prepared softmax arrays: 40 traces with individual shape (19, 5687).
2025-07-16 13:46:07,536 - incremental_softmax_recovery - INFO - Filtered log and softmax matrices: 237820 -> 1672 events (0.7% retained).
2025-07-16 13:46:07,567 - incremental_softmax_recovery - INFO - Performed train/test split:

PNG visualization saved to: discovered_petri_net.png
PDF visualization saved to: discovered_petri_net.pdf


2025-07-16 13:46:12,709 - incremental_softmax_recovery - INFO - Case 1/10 (28): SKTR=0.278, Argmax=0.917, Sequence length=36
2025-07-16 13:46:13,856 - incremental_softmax_recovery - INFO - Case 2/10 (37): SKTR=0.333, Argmax=0.700, Sequence length=30
2025-07-16 13:46:14,913 - incremental_softmax_recovery - INFO - Case 3/10 (39): SKTR=0.333, Argmax=0.767, Sequence length=30
2025-07-16 13:46:17,717 - incremental_softmax_recovery - INFO - Case 4/10 (9): SKTR=0.261, Argmax=0.848, Sequence length=46
2025-07-16 13:46:20,809 - incremental_softmax_recovery - INFO - Case 5/10 (29): SKTR=0.250, Argmax=0.944, Sequence length=36
2025-07-16 13:46:25,132 - incremental_softmax_recovery - INFO - Case 6/10 (5): SKTR=0.200, Argmax=0.660, Sequence length=50
2025-07-16 13:46:26,306 - incremental_softmax_recovery - INFO - Case 7/10 (19): SKTR=0.184, Argmax=0.737, Sequence length=38
2025-07-16 13:46:29,639 - incremental_softmax_recovery - INFO - Case 8/10 (22): SKTR=0.500, Argmax=0.955, Sequence length=44
20

In [22]:
# Compute average accuracy for each metric
avg_sktr_accuracy = sum(accuracy_dict['sktr_accuracy']) / len(accuracy_dict['sktr_accuracy'])
avg_argmax_accuracy = sum(accuracy_dict['argmax_accuracy']) / len(accuracy_dict['argmax_accuracy'])

print(f"Average SKTR Accuracy: {avg_sktr_accuracy:.4f}")
print(f"Average Argmax Accuracy: {avg_argmax_accuracy:.4f}")

# Show original dictionary for reference
accuracy_dict

Average SKTR Accuracy: 0.2784
Average Argmax Accuracy: 0.8152


{'sktr_accuracy': [0.2777777777777778,
  0.3333333333333333,
  0.3333333333333333,
  0.2608695652173913,
  0.25,
  0.2,
  0.18421052631578946,
  0.5,
  0.16666666666666666,
  0.2777777777777778],
 'argmax_accuracy': [0.9166666666666666,
  0.7,
  0.7666666666666667,
  0.8478260869565217,
  0.9444444444444444,
  0.66,
  0.7368421052631579,
  0.9545454545454546,
  0.7083333333333334,
  0.9166666666666666]}

In [21]:
prob_dict

{(): {'17': 1.0},
 ('17',): {'17': 0.5,
  '8': 0.05,
  '2': 0.05,
  '0': 0.1,
  '7': 0.15,
  '11': 0.1,
  '6': 0.05},
 ('17', '17'): {'8': 0.1, '2': 0.1, '0': 0.2, '7': 0.3, '11': 0.2, '6': 0.1},
 ('8',): {'8': 0.5, '7': 0.15, '6': 0.1, '9': 0.2, '10': 0.05},
 ('17', '8'): {'8': 1.0},
 ('17', '17', '8'): {'8': 1.0},
 ('8', '8'): {'7': 0.3, '6': 0.2, '9': 0.4, '10': 0.1},
 ('17', '8', '8'): {'7': 1.0},
 ('7',): {'7': 0.5, '9': 0.1, '8': 0.35, '10': 0.05},
 ('8', '7'): {'7': 1.0},
 ('8', '8', '7'): {'7': 1.0},
 ('7', '7'): {'9': 0.2, '8': 0.7, '10': 0.1},
 ('8', '7', '7'): {'9': 0.67, '10': 0.33},
 ('9',): {'9': 0.5, '6': 0.18, '15': 0.05, '10': 0.23, '7': 0.05},
 ('7', '9'): {'9': 1.0},
 ('7', '7', '9'): {'9': 1.0},
 ('9', '9'): {'6': 0.36, '15': 0.09, '10': 0.45, '7': 0.09},
 ('7', '9', '9'): {'6': 1.0},
 ('6',): {'6': 0.5, '10': 0.28, '9': 0.17, '7': 0.06},
 ('9', '6'): {'6': 1.0},
 ('9', '9', '6'): {'6': 1.0},
 ('6', '6'): {'10': 0.56, '9': 0.33, '7': 0.11},
 ('9', '6', '6'): {'10': 

In [23]:
# Show all rows for case '9' without truncation, including full list values
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None, 'display.width', None):
    display(results_df[results_df['case:concept:name'] == '35'])

Unnamed: 0,case:concept:name,step,predicted_activity,argmax_activity,ground_truth,all_probs,all_activities,is_correct,cumulative_accuracy
310,35,0,17,17,17,"[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.05, 0.1, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0
311,35,1,17,17,17,"[0.05, 0.05, 0.06, 0.06, 0.05, 0.06, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.05, 0.1, 0.04]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0
312,35,2,11,11,11,"[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.04, 0.05, 0.12, 0.06, 0.04, 0.04, 0.05, 0.05, 0.05, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0
313,35,3,11,11,11,"[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.04, 0.05, 0.13, 0.06, 0.04, 0.04, 0.05, 0.05, 0.05, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0
314,35,4,11,12,12,"[0.05, 0.04, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.06, 0.13, 0.06, 0.05, 0.04, 0.05, 0.04, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.8
315,35,5,11,12,12,"[0.05, 0.04, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.06, 0.13, 0.06, 0.05, 0.04, 0.05, 0.04, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.666667
316,35,6,0,0,13,"[0.11, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.06, 0.06, 0.04, 0.05, 0.05, 0.04, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.571429
317,35,7,0,0,13,"[0.12, 0.05, 0.05, 0.04, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.05, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.5
318,35,8,11,0,0,"[0.12, 0.06, 0.05, 0.04, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.04, 0.05, 0.05, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.444444
319,35,9,11,0,0,"[0.13, 0.06, 0.05, 0.04, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.04, 0.05, 0.05, 0.05]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.4
