In [8]:
import argparse
from pathlib import Path
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.colors as mcolors

In [None]:
TREND_BIN = 60  # seconds; bin size for vehicle-wise trends from tripinfo (depart-time)
OUT_FONTSIZE = 10
HEATMAP_CMAP = 'RdYlGn_r'   # reverse of RdYlGn so red=high, green=low
ACCENT = "#c86859"

# Hard-coded free-flow speeds (m/s)
VTYPE_FF_SPEED = {
    "aggressive_bike": 16.67,
    "aggressive_car": 16.67,
    "auto_rickshaw": 11.60,
    "bus": 9.72,
    "normal_bike": 13.89,
    "normal_car": 13.89,
}

In [10]:
def robust_read_csv(path: Path) -> pd.DataFrame:
    for kw in [dict(), dict(sep=";"), dict(engine="python"), dict(sep="\t")]:
        try:
            return pd.read_csv(path, **kw)
        except Exception:
            pass
    raise RuntimeError(f"Could not read CSV: {path}")

def nice_savefig(fig, outpath: Path):
    outpath.parent.mkdir(parents=True, exist_ok=True)
    fig.tight_layout()
    fig.savefig(outpath, dpi=160, bbox_inches="tight")
    plt.close(fig)

def parse_net_geometry(net_xml: Path) -> dict:
    """
    Return {edge_id: [(x,y), ...]} from lanes/edges shapes; skip internal ':' edges.
    """
    edges_geom = {}
    if not net_xml or not net_xml.exists():
        return edges_geom
    try:
        root = ET.parse(net_xml).getroot()
        for e in root.findall(".//edge"):
            eid = e.get("id")
            if not eid or eid.startswith(":"):
                continue
            shape = e.get("shape")
            lane_shapes = [ln.get("shape") for ln in e.findall(".//lane") if ln.get("shape")]
            shp = lane_shapes[0] if lane_shapes else shape
            if not shp:
                continue
            pts = []
            for pair in shp.strip().split():
                try:
                    x, y = pair.split(",")
                    pts.append((float(x), float(y)))
                except Exception:
                    continue
            if len(pts) >= 2:
                edges_geom[eid] = pts
    except Exception as e:
        print(f"[warn] Could not parse net for geometry: {e}")
    return edges_geom


In [11]:
def do_summary(summary_csv: Path, outdir: Path):
    if not summary_csv or not summary_csv.exists():
        print("[info] summary.csv not provided.")
        return
    df = robust_read_csv(summary_csv)

    # Required columns
    need = ["time", "running", "meanSpeed", "waiting"]
    missing = [c for c in need if c not in df.columns]
    if missing:
        print(f"[warn] summary missing columns: {missing}. Available: {list(df.columns)}")
        return

    # 1) Running vehicles over time
    fig = plt.figure()
    plt.plot(df["time"].astype(float).values, df["running"].astype(float).values, color=ACCENT)
    plt.xlabel("Time (s)")
    plt.ylabel("Running vehicles")
    plt.title("Total running vehicles over time")
    nice_savefig(fig, outdir / "summary_running_vehicles.png")

    # 2) Network average speed trend
    fig = plt.figure()
    plt.plot(df["time"].astype(float).values, df["meanSpeed"].astype(float).values, color=ACCENT)
    plt.xlabel("Time (s)")
    plt.ylabel("Average speed (m/s)")
    plt.title("Network average speed over time")
    nice_savefig(fig, outdir / "summary_speed_trend.png")

    # 3) Overall waiting time trend
    fig = plt.figure()
    plt.plot(df["time"].astype(float).values, df["waiting"].astype(float).values, color=ACCENT)
    plt.xlabel("Time (s)")
    plt.ylabel("Waiting time (s)")
    plt.title("Overall waiting time over time")
    nice_savefig(fig, outdir / "summary_wait_trend.png")

In [None]:
def _bin_by_depart(df: pd.DataFrame, bin_s: int) -> pd.Series:
    # Floor to bin start (seconds)
    return (df["depart"].astype(float) // bin_s) * bin_s

def do_tripinfo(tripinfo_csv: Path, outdir: Path):
    if not tripinfo_csv or not tripinfo_csv.exists():
        print("[info] tripinfo.csv not provided.")
        return
    trip = robust_read_csv(tripinfo_csv)

    need = ["vType", "duration", "routeLength"]
    missing = [c for c in need if c not in trip.columns]
    if missing:
        print(f"[warn] tripinfo missing columns: {missing}. Available: {list(trip.columns)}")
        return

    exports = outdir / "exports"
    exports.mkdir(parents=True, exist_ok=True)

    # Average travel time by vType
    avg_tt = (trip.groupby("vType")["duration"]
                 .mean().sort_values().rename("avg_travel_time_s").to_frame())
    avg_tt.to_csv(exports / "avg_travel_time_by_vType.csv")
    fig = plt.figure()
    plt.bar(avg_tt.index.astype(str), avg_tt["avg_travel_time_s"].values, color=ACCENT)
    plt.xticks(rotation=30, ha="right")
    plt.ylabel("Average travel time (s)")
    plt.title("Average travel time by vehicle type")
    nice_savefig(fig, outdir / "trip_avg_tt_by_vtype.png")

    # Delay distributions per vType: duration − free-flow time (using hard-coded speeds)
    route_len = trip["routeLength"].astype(float)
    vtypes = trip["vType"].astype(str)
    ff_speed = vtypes.map(lambda t: VTYPE_FF_SPEED.get(t, np.nan))
    default_ff = np.nanmedian(list(VTYPE_FF_SPEED.values()))
    ff_speed = ff_speed.fillna(default_ff)
    freeflow_time = route_len / ff_speed.replace(0, np.nan)
    delay = trip["duration"].astype(float) - freeflow_time
    trip["_delay_s"] = delay.clip(lower=0)

    for vt, grp in trip.groupby("vType"):
        fig = plt.figure()
        plt.hist(grp["_delay_s"].dropna().values, bins=30, color=ACCENT)
        plt.xlabel("Delay (s)")
        plt.ylabel("Count")
        plt.title(f"Delay distribution — {vt}")
        safe = str(vt).replace("/", "-")
        nice_savefig(fig, outdir / f"trip_delay_hist_{safe}.png")

    delay_tbl = (trip.groupby("vType")["_delay_s"]
                    .agg(["count", "mean", "median", "std", "max"])
                    .sort_values("mean", ascending=False))
    delay_tbl.to_csv(exports / "delay_summary_by_vType.csv")

    trip["_speed_realized"] = (
        trip["routeLength"].astype(float) /
        trip["duration"].replace(0, np.nan).astype(float))

    speed_tbl = (
        trip.groupby("vType")["_speed_realized"]
            .agg(["count", "mean", "median", "std", "max"]) 
            .sort_values("mean", ascending=False))
    speed_tbl.to_csv(exports / "speed_summary_by_vType.csv")

    # Route length vs travel time - scatter + 2D histogram (density)
    x = trip["routeLength"].astype(float).values
    y = trip["duration"].astype(float).values

    hist_cmap = plt.cm.get_cmap('Spectral_r').copy()
    hist_cmap.set_under('white')
    
    fig = plt.figure()
    plt.scatter(x, y, s=8, alpha=0.6, color=ACCENT)
    plt.xlabel("Route length (m)")
    plt.ylabel("Travel time (s)")
    plt.title("Route length vs travel time (scatter)")
    nice_savefig(fig, outdir / "trip_routeLen_vs_travelTime_scatter.png")

    fig = plt.figure()
    plt.hist2d(x, y, bins=40, cmap=hist_cmap, vmin=1, cmin=1)  # zeros < 1 = white
    plt.xlabel("Route length (m)")
    plt.ylabel("Travel time (s)")
    plt.title("Route length vs travel time (2D histogram)")
    plt.colorbar(label="Count")
    nice_savefig(fig, outdir / "trip_routeLen_vs_travelTime_hist2d.png")

    # Pearson r overall and by vType
    def pearson_r(a, b):
        if len(a) < 3:
            return float("nan")
        return pd.Series(a).corr(pd.Series(b))

    overall_r = pearson_r(x, y)
    bytype_r = (trip.groupby("vType")
                    .apply(lambda g: pearson_r(g["routeLength"].astype(float), g["duration"].astype(float)))
                    .rename("pearson_r").to_frame())
    corr_df = pd.concat([
        pd.DataFrame({"vType": ["ALL"], "pearson_r": [overall_r]}).set_index("vType"),
        bytype_r
    ])
    corr_df.to_csv(exports / "routeLen_vs_tt_corr.csv")

    # Vehicle-wise trends (using depart-time in bins):
    # - speed trend: realized speed (journey speed) = routeLength / duration
    # - waiting trend: waitingTime average per bin
    if "depart" in trip.columns:
        trip["_bin"] = _bin_by_depart(trip, TREND_BIN)
        trip["_speed_realized"] = trip["routeLength"].astype(float) / trip["duration"].replace(0, np.nan).astype(float)
        if "waitingTime" not in trip.columns:
            trip["waitingTime"] = 0.0

        # Speed trend by vType
        g_speed = (trip.groupby(["_bin", "vType"])["_speed_realized"]
                        .mean().reset_index().sort_values("_bin"))
        fig = plt.figure(figsize=(12, 5))
        for vt, sub in g_speed.groupby("vType"):
            plt.plot(sub["_bin"].values, sub["_speed_realized"].values, label=str(vt))
        plt.xlabel(f"Depart time (s), binned ({TREND_BIN}s)")
        plt.ylabel("Average realized speed (m/s)")
        plt.title("Vehicle-wise speed trend over time (by depart bins)")
        plt.legend(loc="center left", bbox_to_anchor=(1.02, 0.5),
           fontsize=OUT_FONTSIZE, frameon=False)
        nice_savefig(fig, outdir / "trip_speed_trend_by_vType.png")

        # Waiting trend by vType
        g_wait = (trip.groupby(["_bin", "vType"])["waitingTime"]
                      .mean().reset_index().sort_values("_bin"))
        fig = plt.figure(figsize=(12, 5))
        for vt, sub in g_wait.groupby("vType"):
            plt.plot(sub["_bin"].values, sub["waitingTime"].values, label=str(vt))
        plt.xlabel(f"Depart time (s), binned ({TREND_BIN}s)")
        plt.ylabel("Average waiting time (s)")
        plt.title("Vehicle-wise waiting trend over time (by depart bins)")
        plt.legend(loc="center left", bbox_to_anchor=(1.02, 0.5),
           fontsize=OUT_FONTSIZE, frameon=False)
        nice_savefig(fig, outdir / "trip_wait_trend_by_vType.png")

    # Time loss graphs (per vehicle)
    if "timeLoss" in trip.columns:
        tl = trip.copy()
        # Bar: average timeLoss per vType
        fig = plt.figure()
        tl_bar = (tl.groupby("vType")["timeLoss"].mean().sort_values(ascending=False))
        plt.bar(tl_bar.index.astype(str), tl_bar.values, color=ACCENT)
        plt.xticks(rotation=30, ha="right")
        plt.ylabel("Average time loss (s)")
        plt.title("Average time loss by vehicle type")
        nice_savefig(fig, outdir / "trip_timeLoss_by_vType_bar.png")

        # Box: distribution per vType
        fig = plt.figure()
        data = [tl.loc[tl["vType"] == vt, "timeLoss"].dropna().values for vt in tl_bar.index]
        plt.boxplot(data, labels=tl_bar.index.astype(str), vert=True)
        plt.xticks(rotation=30, ha="right")
        plt.ylabel("Time loss (s)")
        plt.title("Time loss distribution by vehicle type")
        nice_savefig(fig, outdir / "trip_timeLoss_by_vType_box.png")


In [None]:
def do_edgedata(edgedata_csv: Path, net_xml: Path, outdir: Path):
    if not edgedata_csv or not edgedata_csv.exists():
        print("[info] edgedata.csv not provided.")
        return
    edge = robust_read_csv(edgedata_csv)

    exports = outdir / "exports"
    exports.mkdir(parents=True, exist_ok=True)

    # Heat map (avg occupancy or avg speed)
    edges_geom = parse_net_geometry(net_xml)

    # 1) Congestion heat map by occupancy or speed
    metric_col = "occupancy" if "occupancy" in edge.columns else ("speed" if "speed" in edge.columns else None)
    metric_name = "occupancy" if metric_col == "occupancy" else ("speed (m/s)" if metric_col == "speed" else None)
    if metric_col and edges_geom:
        edge_avg = edge.groupby("id")[metric_col].mean().rename("metric").to_frame()
        segments, values = [], []
        for eid, row in edge_avg.itertuples():
            pts = edges_geom.get(eid)
            if not pts:
                continue
            segments.append(np.array(pts))
            values.append(row)
        if segments:
            fig, ax = plt.subplots()
            lc = LineCollection(segments, linewidths=2)
            lc.set_array(np.array(values))
            lc.set_cmap(HEATMAP_CMAP)
            ax.add_collection(lc)
            all_pts = np.vstack(segments)
            ax.set_xlim(all_pts[:, 0].min() - 5, all_pts[:, 0].max() + 5)
            ax.set_ylim(all_pts[:, 1].min() - 5, all_pts[:, 1].max() + 5)
            ax.set_aspect("equal", adjustable="box")
            ax.set_title(f"Network congestion heat map — {metric_name} (average)")
            cb = plt.colorbar(lc, ax=ax); cb.set_label(metric_name)
            nice_savefig(fig, outdir / "edge_heatmap_avg_metric.png")
        else:
            print("[warn] No matching geometries for edges in net.xml; heat map skipped.")
    else:
        if not edges_geom:
            print("[info] net.xml not provided or has no geometry; skipping occupancy/speed heat map.")
        else:
            print("[info] No occupancy/speed column; skipping occupancy/speed heat map.")

    # 2) Density heatmap 'density' (edge-level)
    dens_metric = None
    if "density" in edge.columns:
        dens_metric = edge.groupby("id")["density"].mean().rename("density")
    else:
        print("No matching columns found")

    if dens_metric is not None and edges_geom:
        segments, values = [], []
        for eid, val in dens_metric.items():
            pts = edges_geom.get(eid)
            if not pts:
                continue
            segments.append(np.array(pts))
            values.append(val)
        if segments:
            fig, ax = plt.subplots()
            lc = LineCollection(segments, linewidths=2)
            lc.set_array(np.array(values))
            lc.set_cmap(HEATMAP_CMAP)
            ax.add_collection(lc)
            all_pts = np.vstack(segments)
            ax.set_xlim(all_pts[:, 0].min() - 5, all_pts[:, 0].max() + 5)
            ax.set_ylim(all_pts[:, 1].min() - 5, all_pts[:, 1].max() + 5)
            ax.set_aspect("equal", adjustable="box")
            ax.set_title("Network density heat map — average density")
            cb = plt.colorbar(lc, ax=ax); cb.set_label("density")
            nice_savefig(fig, outdir / "edge_heatmap_avg_density.png")
        else:
            print("[warn] No matching geometries for density heat map.")
    elif edges_geom:
        print("[info] No density columns; skipping density heat map.")

    # Bottleneck detection: highest average waitingTime else occupancy else lowest speed
    if "waitingTime" in edge.columns:
        bneck = edge.groupby("id")["waitingTime"].mean().rename("avg_wait_s").sort_values(ascending=False)
    elif "occupancy" in edge.columns:
        bneck = edge.groupby("id")["occupancy"].mean().rename("avg_occupancy").sort_values(ascending=False)
    elif "speed" in edge.columns:
        bneck = edge.groupby("id")["speed"].mean().rename("avg_speed_mps").sort_values(ascending=True)
    else:
        bneck = None
    if bneck is not None:
        bneck.to_csv(exports / "bottlenecks_top30.csv", header=True)
        print("\nTop bottlenecks:\n", bneck.head(10))

In [14]:
def main():
    ap = argparse.ArgumentParser(description="SUMO post-processing (fixed schemas)")
    ap.add_argument("--summary", type=Path, default=Path("summary.csv"))
    ap.add_argument("--tripinfo", type=Path, default=Path("tripinfo.csv"))
    ap.add_argument("--edgedata", type=Path, default=Path("edgedata.csv"))
    ap.add_argument("--net", type=Path, default=Path("siriFort.net.xml"))
    ap.add_argument("--outdir", type=Path, default=Path("out"))
    args, _ = ap.parse_known_args()  # ignore Jupyter/Spyder extras

    args.outdir.mkdir(parents=True, exist_ok=True)

    do_summary(args.summary, args.outdir)
    do_tripinfo(args.tripinfo, args.outdir)
    do_edgedata(args.edgedata, args.net, args.outdir)

    print(f"\nDone. Outputs saved in: {args.outdir.resolve()}")
    print("  - Charts: PNG files in the root of outdir")
    print("  - Tables: CSVs under outdir/exports")

if __name__ == "__main__":
    main()

  hist_cmap = plt.cm.get_cmap('Spectral_r').copy()
  .apply(lambda g: pearson_r(g["routeLength"].astype(float), g["duration"].astype(float)))
  plt.boxplot(data, labels=tl_bar.index.astype(str), vert=True)



Top bottlenecks:
 id
E7.95     187230.71
E7        117485.07
-E8.75    106533.41
E9.147     76310.69
E2.109     43858.00
-E8         1455.35
E9            20.80
E2             8.80
E0             0.00
E1             0.00
Name: avg_wait_s, dtype: float64

Done. Outputs saved in: C:\Users\Asus\Desktop\UCL Dissertation\Outputs\11 (BRT Aggressive)\out
  - Charts: PNG files in the root of outdir
  - Tables: CSVs under outdir/exports
