In [None]:
import numpy as np
from pathlib import Path
import os, json, subprocess, re
from typing import List, Dict, Iterator, Pattern, Any
import pandas as pd
from pandas import DataFrame as df
import matplotlib.pyplot as plt

In [None]:
def period_lambda(pl: float):
    return 86400 / pl

msgs_per_day = [3, 10, 50, 100, 400]

periods = list(map(period_lambda, msgs_per_day))
print(periods)

### Populate data

In [None]:
MAIN_SCRIPT = "./run_main.sh"
CONFIG_FILE = "config.json"

populate = False

def populate_from_config(config_path: str = CONFIG_FILE, runner: str = MAIN_SCRIPT, dry_run: bool = False) -> None:
    base_dir = Path.cwd()
    cfg_path = base_dir / config_path
    if not cfg_path.exists():
        raise FileNotFoundError(f"Config file not found: {cfg_path}")
    try:
        cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON in {cfg_path}: {e}") from e

    desert_env = cfg.get("desert_env")
    rounds = cfg.get("rng_rounds", 1)
    start_rng = cfg.get("rng_start", 1)
    comb: Dict[str, List] = cfg.get("combine", {})
    iterations: List[Dict[str, Any]] = cfg.get("iterations", [])

    print(f"Loaded: rng_rounds={rounds}, rng_start={start_rng}, entries={len(iterations)}")

    cmds: List[str] = []
    if desert_env:
        env_file = (base_dir / desert_env).resolve()
        if not env_file.exists():
            raise FileNotFoundError(f"desert_env file not found: {env_file}")
        # Source environment first
        cmds.append(f"source '{env_file}'")

    if "nn" not in comb:
        comb["nn"] = [0]

    if "period" not in comb:
        comb["period"] = [0]

    if "sink_mode" not in comb:
        comb["sink_mode"] = [0]

    for nn_param in comb["nn"]:
        for period_param in comb["period"]:
            for sink_mode_param in comb["sink_mode"]:
                entry = {
                    "nn": nn_param,
                    "period": period_param,
                    "sink_mode": sink_mode_param
                }
                iterations.append(entry)


    for i, it in enumerate(iterations):
        if not isinstance(it, dict):
            raise ValueError(f"iterations[{i}] must be an object, got {type(it).__name__}")
        nn = it.get("nn")
        period = it.get("period")
        sink_mode = it.get("sink_mode")
        cmds.append(f"{runner} {nn} {period} {sink_mode} {rounds} {start_rng}")

    sep = " && "
    full_cmd = sep.join(cmds)

    if dry_run:
        print("DRY_RUN chained command:\n", full_cmd)
        return

    print("Executing bash command...")
    print(full_cmd)
    completed = subprocess.run(["bash", "-c", full_cmd], cwd=base_dir)
    if completed.returncode != 0:
        raise RuntimeError(f"Chained command failed with exit code {completed.returncode}")
    print("Success!")

if populate:
    populate_from_config()

### Reading trace data, parsing functions

In [None]:
sink_id_dict = {
    "0": 254,
    "1": 253,
    "2": 252
}

def parse_file_records(regex: Pattern, path: str) -> Iterator[Dict]:
    """Yield one dict per matching log line from `path`."""
    with open(path, "r") as f:
        for line in f:
            m = regex.match(line)
            if not m:
                continue

            sink_id = sink_id_dict[m.group("sink_id")] if m.group("sink_id") in sink_id_dict else m.group("sink_id")
            yield {
                "time": float(m.group("time")),
                "sink_id": int(sink_id),
                "packet_id": int(m.group("packet_id")),
                "src_ip": int(m.group("src_ip")),
            }

In [None]:
def pdr_per_iteration(sink_log_path: str, node_log_path: str) -> float:
    _pattern = re.compile(r"\[(?P<time>\d+(?:\.\d+)?)\]::DBG::UWUDP\((?P<sink_id>\d+)\)::recv\(Packet \*, int\)::new packet with id (?P<packet_id>\d+) from ip (?P<src_ip>\d+) : \d+")

    udp_df = df.from_records(parse_file_records(_pattern, sink_log_path))
    result_df = pd.read_csv(node_log_path, skiprows=17, header=0)
    result_df = result_df.replace("_", np.nan).infer_objects(copy=False)
    if verbose:
        # print("Parsed rows:", len(udp_df))
        print(udp_df.head())
        print(result_df.head())

    summary = df()
    if not udp_df.empty:
        summary = udp_df.groupby("packet_id") \
            .agg(first_time=("time", "min"), last_time=("time", "max"), duplicates=("packet_id", "count"), sinks=("sink_id", lambda s: sorted(set(s)))) \
            .reset_index()

        if verbose:
            print(summary.head())
    else:
        print(f"Warning: No matching UDP recv lines found in {sink_log_path}.")


    total_recv = len(summary)
    total_sent = result_df["n_pkts"].sum()
    total_pdr = (total_recv / total_sent * 100) if total_sent > 0 else 100.0

    if verbose:
        print("Total received packets:", total_recv)
        print("Total transmitted packets: ", total_sent)
        print(f"Total PDR: {total_pdr:.2f}")
    return total_pdr

### Preparing data

In [None]:
DATA_PATH = "data/"
EXPERIMENT_ITER = 20

verbose = False
rows = []
for experiment_dir in os.listdir(DATA_PATH):
    subdir_path = os.path.join(DATA_PATH, experiment_dir)
    if os.path.isdir(subdir_path):
        parts = experiment_dir.split('_')

        if verbose:
            print(parts)
        for i in range(1, EXPERIMENT_ITER + 1):
            sink_path = os.path.join(subdir_path, f"{experiment_dir}_{i}_sink.out")
            node_path = os.path.join(subdir_path, f"{experiment_dir}_{i}_node.out")
            pdr = pdr_per_iteration(sink_path, node_path)
            if verbose:
                print(i, pdr)
            rows.append({
                "nn": int(parts[0]),
                "period": float(parts[1]),
                "sink_mode": int(parts[2]),
                "rng": int(i),
                "total_pdr": float(pdr)
            })

### Extracting results

In [None]:
results_df = df(rows)

results_df = results_df.groupby(["nn", "period", "sink_mode"]) \
    .agg(avg_pdr=("total_pdr", "mean")) \
    .reset_index()

results_df["lambda"] = results_df["period"].apply(period_lambda)
print(results_df.head(100))

In [None]:
# pivot so each sink_mode is a column
pivot = results_df.pivot_table(
    index=["nn", "lambda"],
    columns="sink_mode",
    values="avg_pdr",
    aggfunc="mean"
).reset_index()

pdr_1 = pivot.get(1)
pdr_3 = pivot.get(3)

pivot["pdr_gain_abs"] = (pdr_3 - pdr_1).abs()
pivot["pdr_gain_percent"] = (pdr_3 - pdr_1) / pdr_1 * 100

print(pivot.head(100))

### Plotting

In [None]:
results_sink_1_df = results_df[results_df["sink_mode"] == 1]
print(results_sink_1_df.head())
results_sink_3_df = results_df[results_df["sink_mode"] == 3]
print(results_sink_3_df.head())

nn_colors = {
    1:      "tab:blue",
    10:     "tab:orange",
    20:     "tab:green",
    50:     "tab:red",
    100:     "tab:purple"
}

plot_data = [
    {
        "title": "PDR vs PPD with one sink",
        "linestyle": "-",
        "sink": 1,
        "df": results_sink_1_df
    },
    {
        "title": "PDR vs PPD with three sinks",
        "linestyle": "--",
        "sink": 3,
        "df": results_sink_3_df
    }
]

for plot in plot_data:
    for nn in plot["df"]["nn"].unique():
        curr = plot["df"][plot["df"]["nn"] == nn]
        plt.plot(curr["lambda"], curr["avg_pdr"], color=nn_colors[nn], linestyle=plot["linestyle"], label=f"nn={nn},s={plot['sink']}")

plt.ylim(0.0, 110.0)
plt.yticks(range(0, 101, 10))
plt.xlabel("Procedures per day")
plt.ylabel("Packet delivery rate")
plt.legend()
plt.title("Title")
plt.grid(True)
plt.show()

for nn in pivot["nn"].unique():
    curr = pivot[pivot["nn"] == nn]
    plt.plot(curr["lambda"], curr["pdr_gain_percent"], label=f"nn={nn}")

plt.ylim(0.0, 110.0)
plt.yticks(range(0, 101, 10))
plt.xlabel("Procedures per day")
plt.ylabel("PDR gain, %")
plt.legend()
plt.title("PDR gain from redundancy")
plt.grid(True)
plt.show()