In [3]:
import re, pandas as pd, numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# --- parsing ---
def parse_tree(path):
    level_re = re.compile(r"MG level\s+(\d+)")
    row_re   = re.compile(r"\|\-\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s*$")
    records, cur = [], None
    with open(path, "r", errors="replace") as f:
        for line in f:
            m = level_re.search(line)
            if m:
                cur = int(m.group(1))
                continue
            m = row_re.search(line)
            if m and cur is not None:
                src, dst, cnt, size = map(int, m.groups())
                records.append((cur, src, dst, cnt, size))
    return pd.DataFrame(records, columns=["level","src","dst","count","size"])

# --- binning ---
def log2_edges(min_size, max_size):
    lo = int(np.floor(np.log2(max(1, min_size))))
    hi = int(np.ceil(np.log2(max(1, max_size))))
    return [2**e for e in range(lo, hi+2)]

def aggregate(df, edges):
    bins = pd.cut(df["size"], bins=edges, right=False, include_lowest=True)
    g = (df.assign(bin=bins)
           .groupby(["level","bin"], observed=True)
           .agg(n_items=("size","size"),
                n_sends=("count","sum"))
           .reset_index())
    g["bin_left"] = g["bin"].apply(lambda iv: int(iv.left))
    g["bin_label"] = g["bin"].astype(str)
    return g

# --- plotting helpers ---
def plot_level_hist(agg, level, metric="n_items"):
    sub = agg[agg["level"]==level].sort_values("bin_left")
    plt.figure(figsize=(10,4))
    plt.bar(sub["bin_label"], sub[metric])
    plt.xticks(rotation=60, ha="right")
    plt.ylabel(metric)
    plt.title(f"Level {level} â€“ {metric}")
    plt.show()

def plot_overlay_lines(agg, metric="n_sends"):
    plt.figure(figsize=(10,5))
    for lvl, sub in agg.sort_values("bin_left").groupby("level"):
        plt.plot(sub["bin_left"], sub[metric], marker="o", label=f"lvl {lvl}")
    plt.xscale("log", base=2)
    plt.xlabel("message size (bytes, log2)")
    plt.ylabel(metric)
    plt.title(f"{metric} vs msg size")
    plt.legend()
    plt.show()


In [4]:
# 1. parse your Caliper tree file
df = parse_tree("levels_64")   # <-- update path if needed
print("parsed records:", len(df))
display(df.head())

# 2. create log2 bins
edges = log2_edges(df["size"].min(), df["size"].max())

# 3. aggregate
agg = aggregate(df, edges)
display(agg.head())

# 4. plot histogram for a specific level (say level 0)
plot_level_hist(agg, level=0, metric="n_items")

# 5. overlay all levels
plot_overlay_lines(agg, metric="n_sends")


FileNotFoundError: [Errno 2] No such file or directory: 'levels_64'