In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import pandas as pd
from src.utils import prepare_df, group_cases_by_trace, compute_accuracies_by_case
from src.incremental_softmax_recovery import incremental_softmax_recovery

In [3]:
# Configure logging with selective DEBUG for our modules only
logging.basicConfig(
    level=logging.INFO,           # Set root to INFO (reduces third-party noise)
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    force=True                    # Force override of any existing handlers (useful in Jupyter)
)

# Enable DEBUG for our specific modules only
our_modules = [
    'classes', 
    'incremental_softmax_recovery', 
    'utils', 
    'conformance_checking',
    'data_processing',
    'petri_model',
    'calibration'
]

for module_name in our_modules:
    logging.getLogger(module_name).setLevel(logging.DEBUG)

# Silence noisy third-party libraries
logging.getLogger('graphviz').setLevel(logging.WARNING)  # Only show warnings/errors from graphviz
logging.getLogger('matplotlib').setLevel(logging.WARNING)  # Silence matplotlib if present
logging.getLogger('PIL').setLevel(logging.WARNING)  # Silence PIL if present

print("✅ Logging configured: DEBUG for our modules, INFO+ for third-party libraries")


✅ Logging configured: DEBUG for our modules, INFO+ for third-party libraries


In [4]:
# load your DataFrame and softmax list
result = prepare_df('50salads')

if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result


# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [None]:
# Configuration for incremental_softmax_recovery (conformance-only)
from src.utils import linear_prob_combiner


config = {
    # === Data Splitting ===
    'n_train_traces': 10,
    'n_test_traces': 10,
    'train_cases': None,
    'test_cases': None,
    'ensure_train_variant_diversity': True,
    'ensure_test_variant_diversity': True,
    'use_same_traces_for_train_test': False,  # If True, use same traces for both train and test
    'compute_marking_transition_map': True,

    # === Sampling Configuration ===
    'sequential_sampling': True,   # True: sample from activity runs, False: uniform sampling
    'n_indices': None,             # Required when sequential_sampling=False
    'n_per_run': 10000,                # Required when sequential_sampling=True
    'independent_sampling': True,

    # === Conformance Parameters ===
    'prob_threshold': 1e-6,                    # Probability threshold for filtering activities
    'chunk_size': 11,                          # Window size for chunked conformance
    'conformance_switch_penalty_weight': 1.0,  # Multiplier for label-switch penalty (actual penalty = weight * P(stay_with_prev_activity))
    'merge_mismatched_boundaries': False,      # Merge adjacent chunks when boundary labels disagree

    # === Conditioned Probability Adjustment ===
    'conditioning_alpha': 0.9,                 # Blend weight: None=off, 0=full conditioned, 1=observed only
    'conditioning_combine_fn': linear_prob_combiner,  # Linear blend: (1-alpha)*conditioned + alpha*observed
    
    # === Multi-Label Conditioning (NEW!) ===
    'max_hist_len': 3,                         # History length for probability dict (must be >= conditioning_n_prev_labels)
    'conditioning_n_prev_labels': 3,           # Number of previous different labels to use (1=legacy, 2-3=recommended)
    'conditioning_interpolation_weights': [0.5, 0.3, 0.2],  # Weights for n-gram interpolation (recency-based)
    'use_collapsed_runs': True,                # Build n-gram stats from run-to-run transitions (recommended: True)
    
    # === Cost Function ===
    'cost_function': "linear",    # "linear", "logarithmic", or callable
    'model_move_cost': 1.0,
    'log_move_cost': 1.0,
    'tau_move_cost': 0.0,
    'non_sync_penalty': 1.0,

    # === Temperature Calibration ===
    'use_calibration': True,
    'temp_bounds': (1.0, 10.0),
    'temperature': None,

    # === Logging ===
    'verbose': True,
    'log_level': logging.INFO,

    # === Miscellaneous ===
    'round_precision': 2,
    'random_seed': 101,
    'save_model_path': "./results/discovered_petri_net",
    'save_model': True,
}

# Usage:
output = incremental_softmax_recovery(
    df=df,
    softmax_lst=softmax_lst,
    **config 
)

# Unpack results - now returns 4 values instead of 3
results_df, accuracy_dict, prob_dict_uncollapsed, prob_dict_collapsed = output


In [1]:
from src.utils import add_kari_column_to_results

merged = add_kari_column_to_results(
    target_csv_path="results/hyperparam_search_alpha_0.05_weights_unigram_super_heavy_reordered.csv",
    kari_csv_path="results/sktr_kari_argmax_50salads_results.csv",
    case_col="case:concept:name",
    kari_col="kari_activity",   # this column exists in your KARI CSV
    output_path="results/hyperparam_search_alpha_0.05_weights_unigram_super_heavy_reordered.csv",
    validate_lengths=True
)

In [3]:
from src.evaluation import print_tas_metrics_from_csv

print_tas_metrics_from_csv(
    "results/hyperparam_search_alpha_0.05_weights_unigram_super_heavy_reordered.csv",
    case_col="case:concept:name",
    sktr_pred_col="sktr_activity",
    argmax_pred_col="argmax_activity",
    kari_pred_col="kari_activity",  # if present
    gt_col="ground_truth",
    background=0,                   # or None to auto-detect
    precision=2,
    return_tables=False
)


TAS Metrics Comparison
Metric    acc_micro   edit  f1@10  f1@25  f1@50
Approach                                       
ARGMAX        82.20  57.30  67.48  66.02  58.53
KARI          79.37  76.21  79.56  78.49  69.29
SKTR          82.17  73.97  82.30  80.36  71.35

SKTR - Argmax Differences
Metric       acc_micro   edit  f1@10  f1@25  f1@50
SKTR-ARGMAX      -0.03  16.67  14.82  14.34  12.82

SKTR - KARI Differences
Metric     acc_micro  edit  f1@10  f1@25  f1@50
SKTR-KARI        2.8 -2.24   2.74   1.87   2.06


In [10]:
# Show all rows for case '28' without truncation, including full list values
case_28_df = results_df[results_df['case:concept:name'] == '36']
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None, 'display.width', None):
    display(case_28_df)
total_cost = case_28_df['sktr_move_cost'].sum()
print(f"Total SKTR move cost for case 28: {total_cost:.4f}")

Unnamed: 0,case:concept:name,step,sktr_activity,argmax_activity,ground_truth,all_probs,all_activities,is_correct,cumulative_accuracy,sktr_move_cost
490,36,0,17,17,17,"[0.0, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.03, 0.0, 0.01, 0.0, 0.01, 0.0, 0.88, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.12
491,36,1,17,17,17,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.01, 0.0, 0.01, 0.01, 0.0, 0.0, 0.0, 0.01, 0.0, 0.94, 0.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.06
492,36,2,17,17,17,"[0.01, 0.0, 0.01, 0.01, 0.0, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.01, 0.0, 0.01, 0.0, 0.01, 0.0, 0.91, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.09
493,36,3,17,17,17,"[0.05, 0.01, 0.03, 0.02, 0.02, 0.02, 0.03, 0.02, 0.09, 0.05, 0.02, 0.07, 0.02, 0.03, 0.01, 0.01, 0.01, 0.47, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.53
494,36,4,17,17,17,"[0.02, 0.01, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01, 0.02, 0.01, 0.01, 0.03, 0.0, 0.02, 0.0, 0.01, 0.0, 0.8, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.2
495,36,5,17,17,17,"[0.02, 0.01, 0.01, 0.01, 0.0, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01, 0.04, 0.01, 0.01, 0.0, 0.01, 0.0, 0.82, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.18
496,36,6,17,17,17,"[0.02, 0.01, 0.02, 0.01, 0.01, 0.02, 0.02, 0.01, 0.07, 0.01, 0.02, 0.05, 0.01, 0.02, 0.0, 0.01, 0.0, 0.65, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,1.0,0.35
497,36,0,17,17,0,"[0.19, 0.01, 0.03, 0.02, 0.01, 0.02, 0.02, 0.02, 0.06, 0.02, 0.04, 0.13, 0.04, 0.02, 0.0, 0.02, 0.01, 0.29, 0.02]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",False,0.875,0.71
498,36,1,0,0,0,"[0.65, 0.03, 0.03, 0.01, 0.02, 0.01, 0.01, 0.01, 0.02, 0.02, 0.03, 0.03, 0.05, 0.01, 0.01, 0.01, 0.02, 0.02, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,0.888889,1.21
499,36,2,0,0,0,"[0.69, 0.03, 0.02, 0.01, 0.02, 0.01, 0.0, 0.01, 0.02, 0.01, 0.06, 0.02, 0.05, 0.02, 0.01, 0.01, 0.01, 0.01, 0.01]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]",True,0.9,0.31


Total SKTR move cost for case 28: 56.9400


# Results Investigation

In [None]:
# Comprehensive Group Statistics Analysis
# This cell computes detailed statistics to understand differences between the two groups

def print_group_comparison(stats):
    """Print a comprehensive comparison of two groups"""
    print("=" * 80)
    print("COMPREHENSIVE GROUP COMPARISON ANALYSIS")
    print("=" * 80)

    group1 = stats['group1']
    group2 = stats['group2']

    print("\n1. ARGMAX ACCURACY COMPARISON:")
    print("-" * 40)
    print(f"Group 1 - Mean: {group1['mean_accuracy']:.4f} ± {group1['std_accuracy']:.4f}")
    print(f"Group 2 - Mean: {group2['mean_accuracy']:.4f} ± {group2['std_accuracy']:.4f}")
    print(f"Difference: {group1['mean_accuracy'] - group2['mean_accuracy']:.4f}")

    print("\n2. SEQUENCE LENGTH ANALYSIS:")
    print("-" * 40)
    print(f"Group 1 - Mean length: {group1['mean_sequence_length']:.1f} ± {group1['std_sequence_length']:.1f}")
    print(f"Group 2 - Mean length: {group2['mean_sequence_length']:.1f} ± {group2['std_sequence_length']:.1f}")
    print(f"Length difference: {group1['mean_sequence_length'] - group2['mean_sequence_length']:.1f}")

    print("\n3. PREDICTION CONFIDENCE ANALYSIS:")
    print("-" * 40)
    print(f"Group 1 - Mean max prob: {group1['mean_max_prob']:.4f} ± {group1['std_max_prob']:.4f}")
    print(f"Group 2 - Mean max prob: {group2['mean_max_prob']:.4f} ± {group2['std_max_prob']:.4f}")
    print(f"Confidence difference: {group1['mean_max_prob'] - group2['mean_max_prob']:.4f}")

    print("\n4. ENTROPY ANALYSIS:")
    print("-" * 40)
    print(f"Group 1 - Mean entropy: {group1['mean_entropy']:.4f} ± {group1['std_entropy']:.4f}")
    print(f"Group 2 - Mean entropy: {group2['mean_entropy']:.4f} ± {group2['std_entropy']:.4f}")
    print(f"Entropy difference: {group1['mean_entropy'] - group2['mean_entropy']:.4f}")
    print("(Lower entropy = more confident predictions)")

    # Class distribution analysis
    print("\n5. ACTIVITY CLASS DISTRIBUTIONS:")
    print("-" * 40)

    # Get all unique classes
    all_classes = set(group1['gt_class_distribution'].keys()) | set(group2['gt_class_distribution'].keys())

    print("Ground Truth Class Distribution:")
    print("Class | Group1 Count | Group1 % | Group2 Count | Group2 % | Difference")
    print("-" * 70)

    for cls in sorted(all_classes):
        g1_count = group1['gt_class_distribution'].get(cls, 0)
        g2_count = group2['gt_class_distribution'].get(cls, 0)

        g1_total = sum(group1['gt_class_distribution'].values())
        g2_total = sum(group2['gt_class_distribution'].values())

        g1_pct = g1_count / g1_total * 100 if g1_total > 0 else 0
        g2_pct = g2_count / g2_total * 100 if g2_total > 0 else 0

        print(f"{cls:5} | {g1_count:11} | {g1_pct:7.2f} | {g2_count:11} | {g2_pct:7.2f} | {g1_pct - g2_pct:8.2f}")

    print("\n6. TOP 5 ACCURACIES PER GROUP:")
    print("-" * 40)
    print("Group 1 (highest to lowest):")
    for i, acc in enumerate(sorted(group1['individual_accuracies'], reverse=True)[:5]):
        print(f"  {i+1}. {acc:.4f}")

    print("Group 2 (highest to lowest):")
    for i, acc in enumerate(sorted(group2['individual_accuracies'], reverse=True)[:5]):
        print(f"  {i+1}. {acc:.4f}")

# Run the comprehensive analysis
from src.utils import compute_comprehensive_group_statistics

# Define the two groups
group1_cases = ['20', '11', '5', '36', '14', '4', '30', '15', '3', '18']
group2_cases = [str(i) for i in range(len(softmax_lst)) if str(i) not in group1_cases]

print(f"Analyzing Group 1: {len(group1_cases)} cases")
print(f"Analyzing Group 2: {len(group2_cases)} cases")

# Compute comprehensive statistics
stats = compute_comprehensive_group_statistics(df, softmax_lst, group1_cases, group2_cases)

# Print comparison
print_group_comparison(stats)

print("\n" + "=" * 80)
print("ANALYSIS SUMMARY")
print("=" * 80)

if stats['group1']['mean_accuracy'] - stats['group2']['mean_accuracy'] > 0.01:
    print("📈 Group 1 has noticeably higher argmax accuracy")
elif stats['group2']['mean_accuracy'] - stats['group1']['mean_accuracy'] > 0.01:
    print("📈 Group 2 has noticeably higher argmax accuracy")
else:
    print("📊 Argmax accuracies are very similar between groups")

if abs(stats['group1']['mean_sequence_length'] - stats['group2']['mean_sequence_length']) > 20:
    print("📏 Groups have substantially different sequence lengths")
else:
    print("📐 Groups have similar sequence lengths")

if abs(stats['group1']['mean_max_prob'] - stats['group2']['mean_max_prob']) > 0.05:
    print("🎯 Groups have substantially different prediction confidence")
else:
    print("🎲 Groups have similar prediction confidence")

if abs(stats['group1']['mean_entropy'] - stats['group2']['mean_entropy']) > 0.1:
    print("🔀 Groups have substantially different prediction uncertainty")
    if stats['group1']['mean_entropy'] < stats['group2']['mean_entropy']:
        print("   (Group 1 has more confident predictions)")
    else:
        print("   (Group 2 has more confident predictions)")
else:
    print("⚖️ Groups have similar prediction uncertainty")


Analyzing Group 1: 10 cases
Analyzing Group 2: 30 cases
COMPREHENSIVE GROUP COMPARISON ANALYSIS

1. ARGMAX ACCURACY COMPARISON:
----------------------------------------
Group 1 - Mean: 0.8295 ± 0.1067
Group 2 - Mean: 0.8247 ± 0.0962
Difference: 0.0048

2. SEQUENCE LENGTH ANALYSIS:
----------------------------------------
Group 1 - Mean length: 5898.9 ± 389.2
Group 2 - Mean length: 5961.0 ± 365.1
Length difference: -62.1

3. PREDICTION CONFIDENCE ANALYSIS:
----------------------------------------
Group 1 - Mean max prob: 0.9331 ± 0.1362
Group 2 - Mean max prob: 0.9361 ± 0.1357
Confidence difference: -0.0031

4. ENTROPY ANALYSIS:
----------------------------------------
Group 1 - Mean entropy: 0.2628 ± 0.4249
Group 2 - Mean entropy: 0.2482 ± 0.4090
Entropy difference: 0.0146
(Lower entropy = more confident predictions)

5. ACTIVITY CLASS DISTRIBUTIONS:
----------------------------------------
Ground Truth Class Distribution:
Class | Group1 Count | Group1 % | Group2 Count | Group2 % | Dif