In [1]:
%load_ext autoreload
%autoreload 2

# Then your regular imports
from utils import prepare_df, group_cases_by_trace
from incremental_softmax_recovery import incremental_softmax_recovery

In [2]:
# load your DataFrame and softmax list
result = prepare_df('gtea')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[8, 9, 10, 11]",2009
1,"[4, 5, 6, 7]",943
2,"[12, 13, 14, 15]",1178
3,"[24, 25, 26, 27]",718
4,"[20, 21, 22, 23]",1235
5,"[16, 17, 18, 19]",1384
6,"[0, 1, 2, 3]",1643


In [None]:
# Correct configuration for incremental_softmax_recovery
config = {
    # === Data Splitting ===
    'n_train_traces': 7,                     # Number of training traces
    'n_test_traces': 10,                     # Number of test traces  
    'train_cases': None,                     # Specific train case IDs (overrides n_train_traces)
    'test_cases': None,                      # Specific test case IDs (overrides n_test_cases)
    'ensure_train_variant_diversity': True,  # Enforce distinct variants in training
    'ensure_test_variant_diversity': False,  # Enforce distinct variants in testing
    
    # === Sampling Configuration ===
    'sequential_sampling': True,             # True: sample from activity runs, False: uniform sampling
    'n_indices': None,                       # Events to sample per trace (when sequential_sampling=False)
    'n_per_run': 2,                          # Events per activity run (when sequential_sampling=True)
    'independent_sampling': True,            # Each trace gets different random seed
    
    # === Beam Search ===
    'beam_width': 50,                        # Number of candidates to maintain
    'beam_epsilon': 1e-2,                    # Smoothing factor for beam scoring
    'beam_alpha': 0.5,                       # Weight between avg cost and total cost
    'activity_prob_threshold': 0.0,          # Minimum probability to consider activity
    
    # === Cost Function ===
    'cost_function': "linear",               # "linear", "logarithmic", or callable
    'model_move_cost': 1.0,                  # Cost for model-only moves
    'log_move_cost': 1.0,                    # Cost for log-only moves  
    'tau_move_cost': 1e-6,                   # Cost for silent (tau) moves
    
    # === Conditional Probabilities ===
    'use_cond_probs': True,                  # Enable conditional probabilities
    'max_hist_len': 3,                       # Maximum history length for conditioning
    'lambdas': [0.1, 0.3, 0.6],              # Blending weights for n-gram smoothing
    'alpha': 0.5,                            # History vs base probability weight (0=history, 1=base)
    'use_ngram_smoothing': True,             # Apply n-gram smoothing
    
    # === Temperature Calibration ===
    'use_calibration': True,                 # Enable temperature scaling
    'temp_bounds': (1.0, 10.0),              # Temperature optimization bounds
    'temperature': None,                     # Manual temperature (bypasses optimization)
    
    # === Miscellaneous ===
    'round_precision': 2,                     # Decimal places for probability rounding
    'random_seed': 123,                       # Random seed for reproducibility
    'save_model': False,                      # Save model to PDF
    'model_save_path': "discovered_model.pdf", # Path for saved model
    'return_accuracies': True,                # Return accuracy metrics
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results based on return_accuracies setting
if config['return_accuracies']:
    results, accuracies = output
else:
    results = output


In [15]:
results[results['case:concept:name'] == '8']

Unnamed: 0,case:concept:name,step,predicted_activity,ground_truth,is_correct,cumulative_accuracy
0,8,0,10,10,True,1.000000
1,8,1,10,10,True,1.000000
2,8,2,0,0,True,1.000000
3,8,3,0,0,True,1.000000
4,8,4,10,10,True,1.000000
...,...,...,...,...,...,...
64,8,64,10,10,True,0.461538
65,8,65,10,6,False,0.454545
66,8,66,10,6,False,0.447761
67,8,67,10,10,True,0.455882


In [16]:
accuracies

{'sktr_accuracies': [0.6551724137931034,
  0.6086956521739131,
  0.7931034482758621,
  0.6111111111111112,
  0.5555555555555556,
  0.46153846153846156,
  0.4807692307692308,
  0.6923076923076923,
  0.463768115942029,
  0.5507246376811594],
 'argmax_accuracies': [0.6231884057971014,
  0.7413793103448276,
  0.7068965517241379,
  0.7916666666666666,
  0.6730769230769231,
  0.6956521739130435,
  0.7638888888888888,
  0.7391304347826086,
  0.7307692307692307,
  0.75],
 'sktr_avg_accuracy': 0.5872746319148119,
 'argmax_avg_accuracy': 0.7215648585963428}