In [1]:
# Cell 1 ──────────────────────────────────────────────────────────
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

plt.rcParams.update({
    "figure.figsize": (6, 3),
    "font.size": 11,
})


In [2]:
# Cell 2 ──────────────────────────────────────────────────────────
def drop_outlier_calls(df, column="p99"):
    """
    Remove calls whose per-call p99 is an outlier
    (outside Q1 ± 1.5 × IQR). Returns the cleaned DataFrame and
    the percentage of calls removed.
    """
    series = df[column].dropna()
    q1, q3 = np.percentile(series, [25, 75])
    iqr = q3 - q1
    low, high = q1 - 1.5 * iqr, q3 + 1.5 * iqr
    mask = series.between(low, high)
    pct_removed = 100 * (~mask).mean()
    return df.loc[mask], pct_removed


In [4]:
# Cell 3 ──────────────────────────────────────────────────────────
STATS_FILE = Path("./owd_call_stats.csv")   # <— change as needed
assert STATS_FILE.exists(), f"File not found: {STATS_FILE}"

df_all = pd.read_csv(STATS_FILE)
df_all.head()


Unnamed: 0,call_id,direction,count,mean,std,p99
0,iteration_136_owd,caller→callee,0,,,
1,iteration_136_owd,callee→caller,0,,,
2,iteration_330_owd,caller→callee,0,,,
3,iteration_330_owd,callee→caller,0,,,
4,iteration_319_owd,caller→callee,0,,,


In [5]:
# Cell 4 ──────────────────────────────────────────────────────────
DIRECTION = "both"          # "caller→callee", "callee→caller", or "both"

if DIRECTION.lower() != "both":
    df = df_all[df_all["direction"] == DIRECTION].copy()
else:
    df = df_all.copy()

print(f"Loaded {len(df):,} rows for direction = {DIRECTION}")


Loaded 46,942 rows for direction = both


In [6]:
# Cell 5 ──────────────────────────────────────────────────────────
df_clean, pct_removed = drop_outlier_calls(df, column="p99")
p99 = df_clean["p99"].dropna()

THRESH = 360    # ms QoE threshold

summary = {
    "Calls before filter": len(df),
    "Calls after filter" : len(df_clean),
    "% calls removed"    : f"{pct_removed:.2f} %",
    "Mean p99 (ms)"      : p99.mean(),
    "Median p99 (ms)"    : p99.median(),
    "Std p99 (ms)"       : p99.std(),
    "95th p99 (ms)"      : np.percentile(p99, 95),
    "99th p99 (ms)"      : np.percentile(p99, 99),
    f"% calls p99 > {THRESH} ms": (p99 > THRESH).mean() * 100,
}

print("\nAggregate statistics (per-call p99, outliers removed)\n" +
      "──────────────────────────────────────────────────────")
for k, v in summary.items():
    print(f"{k:>24}: {v:,.2f}" if isinstance(v, float) else f"{k:>24}: {v}")


IndexError: index -1 is out of bounds for axis 0 with size 0