In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils import (
    prepare_df,
    group_cases_by_trace,
    get_activity_run_lengths_by_case,
    get_sequences_by_case,
    normalize_sequences_for_evaluation,
    compute_evaluation_metrics,
    compute_kari_metrics
)

import pandas as pd
import pickle as pkl
from evaluation import compute_tas_metrics_macro
from pprint import pprint

In [3]:
# load your DataFrame and softmax list
result = prepare_df('50salads')
if len(result) == 2:
    df, softmax_lst = result
else:
    df, softmax_lst, _ = result

# group by trace and inspect
trace_groups = group_cases_by_trace(df)
trace_groups

Unnamed: 0,case_list,trace_length
0,"[0, 1, 2, 3]",5687
1,"[32, 33, 34, 35]",6186
2,"[36, 37, 38, 39]",5840
3,"[28, 29, 30, 31]",5261
4,"[4, 5, 6, 7]",6208
5,"[16, 17, 18, 19]",6293
6,"[24, 25, 26, 27]",6046
7,"[8, 9, 10, 11]",6584
8,"[12, 13, 14, 15]",5558
9,"[20, 21, 22, 23]",5792


In [22]:
get_activity_run_lengths_by_case(df, '1', min_runs=3, include_preceding_sequence=True)

{'4': ([63, 37, 45],
  [('17', '2', '3', '0'),
   ('17', '2', '3', '0', '1', '0'),
   ('17', '2', '3', '0', '1', '0', '1', '15')]),
 '5': ([63, 37, 45],
  [('17', '2', '3', '0'),
   ('17', '2', '3', '0', '1', '0'),
   ('17', '2', '3', '0', '1', '0', '1', '15')]),
 '6': ([63, 37, 45],
  [('17', '2', '3', '0'),
   ('17', '2', '3', '0', '1', '0'),
   ('17', '2', '3', '0', '1', '0', '1', '15')]),
 '7': ([63, 37, 45],
  [('17', '2', '3', '0'),
   ('17', '2', '3', '0', '1', '0'),
   ('17', '2', '3', '0', '1', '0', '1', '15')]),
 '24': ([86, 122, 143],
  [('17', '7', '8', '9', '6', '10', '0'),
   ('17', '7', '8', '9', '6', '10', '0', '1', '0'),
   ('17', '7', '8', '9', '6', '10', '0', '1', '0', '1', '4', '5')]),
 '25': ([86, 122, 143],
  [('17', '7', '8', '9', '6', '10', '0'),
   ('17', '7', '8', '9', '6', '10', '0', '1', '0'),
   ('17', '7', '8', '9', '6', '10', '0', '1', '0', '1', '4', '5')]),
 '26': ([86, 122, 143],
  [('17', '7', '8', '9', '6', '10', '0'),
   ('17', '7', '8', '9', '6', '1

## Evaluation

In [4]:
import pandas as pd
from evaluation import tas_metrics

## Results for 25% of the data

In [5]:
# Load recovery results
recovery_res = pd.read_csv('recovery_results_50salads_15.csv')

# Argmax
argmax_summary, argmax_per_vid = compute_tas_metrics_macro(
    recovery_res, pred_col="argmax_activity", background=0, return_per_video=True
)
print("Argmax:")
pprint(argmax_summary, sort_dicts=False, width=1)
print()

# SKTR
sktr_summary, sktr_per_vid = compute_tas_metrics_macro(
    recovery_res, pred_col="sktr_activity", background=0, return_per_video=True
)
print("SKTR:")
pprint(sktr_summary,  sort_dicts=False, width=1)


Argmax:
{'acc_micro': 82.47808913526251,
 'edit': 60.71391910996374,
 'f1@10': 69.07644654427946,
 'f1@25': 68.10491527181362,
 'f1@50': 59.971743157957576}

SKTR:
{'acc_micro': 82.6103171777789,
 'edit': 70.77625152625151,
 'f1@10': 80.19253397996623,
 'f1@25': 79.28806243525078,
 'f1@50': 68.72018411934054}


In [6]:
# Test cases
case_ids = ['20', '11', '5', '36', '14', '4', '30', '15', '3', '18']

gt_sequences = get_sequences_by_case(df, case_ids, 'concept:name')

gt_sequences

[['17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',
  '17',


In [7]:
with open("results.pkl", "rb") as f:
    loaded_results = pkl.load(f)

pred_sequences = []
for res in loaded_results:
    pred_sequences.append(res['labels'])

pred_sequences

[array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64),
 array([17, 17, 17, ..., 18, 18, 18], dtype=int64)]

In [12]:
# Normalize
gt_norm, pred_norm = normalize_sequences_for_evaluation(gt_sequences, pred_sequences)

from evaluation import compute_tas_metrics_from_sequences

metrics = compute_tas_metrics_from_sequences(gt_norm, pred_norm, background='0')

print(metrics)


{'acc_micro': 76.85670209700113, 'edit': 70.19007207626133, 'f1@10': 75.16401278579981, 'f1@25': 74.0529016746887, 'f1@50': 64.3414317900242}


In [13]:
import pandas as pd

# Existing results
argmax = {
    'acc_micro': 82.47808913526251,
    'edit': 60.71391910996374,
    'f1@10': 69.07644654427946,
    'f1@25': 68.10491527181362,
    'f1@50': 59.971743157957576
}

sktr = {
    'acc_micro': 82.6103171777789,
    'edit': 70.77625152625151,
    'f1@10': 80.19253397996623,
    'f1@25': 79.28806243525078,
    'f1@50': 68.72018411934054
}

kari_new = {
    'acc_micro': 76.85670209700113,
    'edit': 70.19007207626133,
    'f1@10': 75.16401278579981,
    'f1@25': 74.0529016746887,
    'f1@50': 64.3414317900242
}

# Build DataFrame
df = pd.DataFrame([argmax, sktr, kari_new], index=["Argmax", "SKTR", "KARI"])
df = df.round(2)

print(df)


        acc_micro   edit  f1@10  f1@25  f1@50
Argmax      82.48  60.71  69.08  68.10  59.97
SKTR        82.61  70.78  80.19  79.29  68.72
KARI        76.86  70.19  75.16  74.05  64.34


## Results for complete 50 Salads

In [5]:
# Load your recovery results CSV
res_df = pd.read_csv('recovery_results_50salads_complete_15.csv')

# Compute comprehensive metrics
metrics = compute_evaluation_metrics(res_df)

metrics

Computing evaluation metrics for 40 cases...
Computing SKTR metrics...
Computing argmax metrics...
Evaluation metrics computed successfully!


{'sktr': {'acc_micro': 79.52947607434194,
  'edit': 69.74188333391922,
  'f1@10': 78.9277446261085,
  'f1@25': 76.80830048572012,
  'f1@50': 68.95469970240052},
 'argmax': {'acc_micro': 82.20082415272054,
  'edit': 57.50433447763087,
  'f1@10': 67.25041136569851,
  'f1@25': 65.91483699113223,
  'f1@50': 58.830496416501454}}

In [6]:
# Compute metrics
case_order = ['30', '17', '9', '8', '20', '7', '23', '5', '28', '2', '1', '0', '13', '36', '33', '3',
 '14', '10', '31', '22', '34', '38', '37', '6', '24', '27', '21', '15', '11', '19', '16',
 '12', '32', '25', '35', '39', '26', '29', '4', '18']
 
results = compute_kari_metrics(
    pkl_file_path='kari_results_50salads_complete.pkl',
    df=df,
    case_id_order=case_order,
    method_name='kari'
)

results

{'kari': {'acc_micro': 79.36548650239678,
  'edit': 76.21105357752282,
  'f1@10': 79.55645941148791,
  'f1@25': 78.49189068994244,
  'f1@50': 69.29259384569326}}