In [3]:
%load_ext autoreload
%autoreload 2

# Then your regular imports
import logging
import pandas as pd
from utils import prepare_df, group_cases_by_trace
from incremental_softmax_recovery import incremental_softmax_recovery

In [4]:
# Configure root logger to show DEBUG messages
logging.basicConfig(
    level=logging.DEBUG,          # Set to DEBUG (lowest level)
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',  # Optional: Custom format for timestamps/names
    force=True                    # Force override of any existing handlers (useful in Jupyter)
)



In [5]:
# load your DataFrame and softmax list
result = prepare_df('50salads')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [6]:
# Correct configuration for incremental_softmax_recovery
config = {
    # === Data Splitting ===
    'n_train_traces': 10,
    'n_test_traces': 10,
    'train_cases': None,
    'test_cases': None,
    'ensure_train_variant_diversity': True,
    'ensure_test_variant_diversity': False,

    # === Sampling Configuration ===
    'use_full_traces': False,
    'sequential_sampling': True,
    'n_indices': None,
    'n_per_run': 1,
    'independent_sampling': True,

    # === Beam Search ===
    'beam_width': 1,
    'activity_prob_threshold': 0.0,

    # === Cost Function ===
    'cost_function': "linear",
    'model_move_cost': 1.0,
    'log_move_cost': 1.0,
    'tau_move_cost': 0.0,
    'non_sync_penalty': 1.0,

    # === Conditional Probabilities ===
    'use_cond_probs': True,
    'max_hist_len': 3,
    'lambdas': [0.1, 0.3, 0.6],
    'alpha': 0.3, # conditional probability weight
    'zero_penalty': 1.0,
    'use_log_smoothing': False,
    'k': 0.0,

    # === Short/Long Term Windows ===
    'short_term_window': 3,
    'long_term_window': 50,

    # === Future Probability Lookahead ===
    'beta': 0.5, # current probability weight

    # === Temperature Calibration ===
    'use_calibration': False,
    'temp_bounds': (1.0, 3.0),
    'temperature': 3.0,

    # === Logging ===
    'verbose': True,
    'log_level': logging.INFO,

    # === Miscellaneous ===
    'round_precision': 2,
    'random_seed': 666,
    'save_model_path': "./discovered_petri_net",
    'save_model': True,
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results
results_df, accuracy_dict, prob_dict = output

2025-07-30 08:44:23,761 - incremental_softmax_recovery - INFO - Starting incremental softmax recovery.
2025-07-30 08:44:23,770 - incremental_softmax_recovery - INFO - Validated sequential case IDs (found 40 unique cases) and 40 softmax matrices.
2025-07-30 08:44:23,771 - incremental_softmax_recovery - INFO - Validated sampling parameters: sequential runs with n_per_run=1.
2025-07-30 08:44:23,771 - incremental_softmax_recovery - INFO - Validated input parameters: beam_width=1, alpha=0.3, round_precision=2.
2025-07-30 08:44:23,772 - incremental_softmax_recovery - INFO - Prepared cost function: linear (model=1.0, log=1.0, tau=0.0).
2025-07-30 08:44:23,772 - incremental_softmax_recovery - INFO - Prepared softmax arrays: 40 traces with individual shape (19, 5687).
2025-07-30 08:44:24,363 - incremental_softmax_recovery - INFO - Processed log and softmax matrices: 237820 -> 836 events (0.4% retained).
2025-07-30 08:44:24,399 - incremental_softmax_recovery - INFO - Performed train/test split: 

PNG visualization saved to: discovered_petri_net.png


2025-07-30 08:44:25,186 - graphviz.rendering - DEBUG - delete './discovered_petri_net'
2025-07-30 08:44:25,186 - incremental_softmax_recovery - INFO - Petri net visualization saved to ./discovered_petri_net.pdf
2025-07-30 08:44:25,187 - incremental_softmax_recovery - INFO - Discovered Petri net model: 42 places, 58 transitions.
2025-07-30 08:44:25,189 - incremental_softmax_recovery - INFO - Built conditional probability dictionary: 193 histories, avg 1.7 activities per history.
2025-07-30 08:44:25,189 - incremental_softmax_recovery - INFO - Built conditional probability dictionary.
2025-07-30 08:44:25,189 - incremental_softmax_recovery - INFO - Prepared 10 test softmax matrices.
2025-07-30 08:44:25,190 - incremental_softmax_recovery - INFO - Extracted 10 test case IDs for processing.
2025-07-30 08:44:25,190 - incremental_softmax_recovery - DEBUG - Case 1/10: 13
2025-07-30 08:44:25,196 - incremental_softmax_recovery - INFO - Case 1/10 (13): SKTR=0.810, Argmax=0.857, Sequence length=21
2

PDF visualization saved to: discovered_petri_net.pdf


In [7]:
# Compute average accuracy for each metric
avg_sktr_accuracy = sum(accuracy_dict['sktr_accuracy']) / len(accuracy_dict['sktr_accuracy'])
avg_argmax_accuracy = sum(accuracy_dict['argmax_accuracy']) / len(accuracy_dict['argmax_accuracy'])

print(f"Average SKTR Accuracy: {avg_sktr_accuracy:.4f}")
print(f"Average Argmax Accuracy: {avg_argmax_accuracy:.4f}")

# Show original dictionary for reference
accuracy_dict

Average SKTR Accuracy: 0.7876
Average Argmax Accuracy: 0.8274


{'sktr_accuracy': [0.8095238095238095,
  0.75,
  0.68,
  0.8260869565217391,
  0.8095238095238095,
  0.7777777777777778,
  0.6666666666666666,
  0.875,
  0.9444444444444444,
  0.7368421052631579],
 'argmax_accuracy': [0.8571428571428571,
  0.7916666666666666,
  0.64,
  0.8695652173913043,
  0.8571428571428571,
  0.8333333333333334,
  0.7083333333333334,
  0.875,
  1.0,
  0.8421052631578947]}

In [8]:
prob_dict[ ('10',)]

{'0': 0.08,
 '11': 0.17,
 '16': 0.25,
 '2': 0.08,
 '18': 0.08,
 '9': 0.17,
 '6': 0.08,
 '4': 0.08}

In [9]:
# Show all rows for case '28' without truncation, including full list values
case_28_df = results_df[results_df['case:concept:name'] == '24']
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None, 'display.width', None):
    display(case_28_df)
total_cost = case_28_df['sktr_move_cost'].sum()
print(f"Total SKTR move cost for case 28: {total_cost:.4f}")

Unnamed: 0,case:concept:name,step,sktr_activity,argmax_activity,ground_truth,all_probs,all_activities,is_correct,cumulative_accuracy,sktr_move_cost
156,24,0,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.0
157,24,1,7,7,7,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.99, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.01
158,24,2,8,8,8,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.97, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.03
159,24,3,9,9,9,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.02, 0.0, 0.96, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.04
160,24,4,6,6,6,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.97, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.03
161,24,5,10,10,10,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.01, 0.93, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.01, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.07
162,24,6,0,0,0,"[0.98, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.02
163,24,7,1,1,1,"[0.03, 0.66, 0.0, 0.03, 0.01, 0.07, 0.0, 0.0, 0.01, 0.01, 0.0, 0.04, 0.0, 0.09, 0.0, 0.01, 0.01, 0.01, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.34
164,24,8,0,0,0,"[0.77, 0.12, 0.01, 0.0, 0.01, 0.03, 0.0, 0.0, 0.0, 0.01, 0.01, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.23
165,24,9,1,1,1,"[0.01, 0.97, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.03


Total SKTR move cost for case 28: 2.4400
