## KPI evaluation

In [4]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
from plotnine import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
results_dir = "./results/"
experiments = {
    "baseline-1h-5x": "baseline-1h-5x",
    "kpi13-1h-5x": "kpi13-1h-5x",
    "stress-baseline-1h-5x": "stress-baseline-1h-5x",
    "stress-kpi13-1h-5x": "stress-kpi13-1h-5x",
}

col_names = ['timestamp_sec', 'timestamp_nsec', 'event_type',
             'span_id', 'parent_span_id', 'span_name', 'level', 'message']

def load_experiment(exp_dir):
    """Load all CSV runs from an experiment directory and compute span durations."""
    exp_path = os.path.join(results_dir, exp_dir)
    csv_files = sorted([f for f in os.listdir(exp_path) if f.endswith('.csv')])
    
    all_durations = []
    for run_idx, csv_file in enumerate(csv_files, start=1):
        filepath = os.path.join(exp_path, csv_file)
        df_raw = pd.read_csv(filepath, names=col_names, skiprows=1)
        
        # Compute span durations by matching start/end
        for span_id, group in df_raw.groupby('span_id'):
            starts = group[group['event_type'] == 'span_start']
            ends = group[group['event_type'] == 'span_end']
            if len(starts) > 0 and len(ends) > 0:
                s = starts.iloc[0]
                e = ends.iloc[-1]
                start_ns = s['timestamp_sec'] * 1_000_000_000 + s['timestamp_nsec']
                end_ns = e['timestamp_sec'] * 1_000_000_000 + e['timestamp_nsec']
                duration_ms = (end_ns - start_ns) / 1_000_000
                all_durations.append({
                    'run': run_idx,
                    'span_name': s['span_name'],
                    'duration_ms': duration_ms,
                })
    
    return pd.DataFrame(all_durations)

# Load all 4 experiments
dfs = {}
for name, directory in experiments.items():
    dfs[name] = load_experiment(directory)
    n_runs = dfs[name]['run'].nunique()
    n_spans = len(dfs[name])
    print(f"{name}: {n_spans} spans across {n_runs} runs")

baseline-1h-5x: 57748 spans across 5 runs
kpi13-1h-5x: 43278 spans across 5 runs
stress-baseline-1h-5x: 54320 spans across 5 runs
stress-kpi13-1h-5x: 41391 spans across 5 runs


In [14]:
# Build summary table per experiment: rows = repeats, columns = mean, sd, p90, p99, max
# Focus on the main failover span (kpi_13_failover); adjust span_name filter as needed.

# Experiments where values are tiny → display in microseconds
us_experiments = {'kpi13-1h-5x', 'stress-kpi13-1h-5x'}

def make_summary_table(df, span_name='kpi_13_failover', unit='ms'):
    """Create a summary table with one row per run/repeat."""
    span_df = df[df['span_name'] == span_name]
    if span_df.empty:
        span_df = df[df['span_name'] == 'apply_patches']
    
    scale = 1000.0 if unit == 'µs' else 1.0  # duration_ms → µs

    rows = []
    for run_num in sorted(span_df['run'].unique()):
        d = span_df[span_df['run'] == run_num]['duration_ms'] * scale
        rows.append({
            'Repeat': f'Repeat {run_num}',
            f'Mean ({unit})': round(d.mean(), 2),
            f'SD ({unit})': round(d.std(), 2),
            f'P90 ({unit})': round(np.percentile(d, 90), 2),
            f'P99 ({unit})': round(np.percentile(d, 99), 2),
            f'Max ({unit})': round(d.max(), 2),
        })
    return pd.DataFrame(rows).set_index('Repeat')

for exp_name, df_exp in dfs.items():
    available = df_exp['span_name'].unique()
    span = 'kpi_13_failover' if 'kpi_13_failover' in available else 'apply_patches'
    unit = 'µs' if exp_name in us_experiments else 'ms'
    table = make_summary_table(df_exp, span_name=span, unit=unit)
    print(f"\n{'='*60}")
    print(f"  {exp_name}  (span: {span}, unit: {unit})")
    print(f"{'='*60}")
    print(table.to_string())
    print()


  baseline-1h-5x  (span: kpi_13_failover, unit: ms)
          Mean (ms)  SD (ms)  P90 (ms)  P99 (ms)  Max (ms)
Repeat                                                    
Repeat 1      12.24    61.65      3.45    392.65    531.89
Repeat 2      13.77    64.16      4.37    388.48    515.76
Repeat 3      12.53    61.23      4.59    386.04    524.39
Repeat 4      11.16    57.65      3.80    369.05    502.65
Repeat 5      12.06    59.97      4.53    391.54    538.66


  kpi13-1h-5x  (span: kpi_13_failover, unit: µs)
          Mean (µs)  SD (µs)  P90 (µs)  P99 (µs)  Max (µs)
Repeat                                                    
Repeat 1       3.13     5.07      4.07      5.75    236.50
Repeat 2       3.03     6.40      3.84      5.21    299.70
Repeat 3       3.93    28.15      3.88      7.31   1289.53
Repeat 4       3.48    15.74      3.95      6.86    766.97
Repeat 5       3.73    18.32      3.97      7.26    735.82


  stress-baseline-1h-5x  (span: kpi_13_failover, unit: ms)
         