In [1]:
data_folder = "/Users/albanpuech/benchmark_data/case24/datakit/"

In [2]:
# %% [markdown]
# # Datakit-only interactive comparison (preloaded + spider plot with log scale)

# %%
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import ipywidgets as widgets
from IPython.display import display
import numpy as np

# -----------------------------------------------------------------------------
# (1) Specify Datakit variants: label -> path
# -----------------------------------------------------------------------------
VERSIONS = {
    "top_none_gc_none": data_folder + "/top_none_gc_none/case24_ieee_rts/raw/pf_node.csv",
    "top_none_gc_none_lnoise_0.1": data_folder + "/top_none_gc_none_lnoise_0.1/case24_ieee_rts/raw/pf_node.csv",
    "top_none_gc_none_5000": data_folder + "/top_none_gc_none_5000/case24_ieee_rts/raw/pf_node.csv",
    "top_none_gc_pert": data_folder + "/top_none_gc_pert_3/case24_ieee_rts/raw/pf_node.csv",
    "top_none_gc_perm": data_folder + "/top_none_gc_perm/case24_ieee_rts/raw/pf_node.csv",
    "top_random_line_k_2_variants_2_gc_none": data_folder + "/top_random_line_k_2_variants_2_gc_none/case24_ieee_rts/raw/pf_node.csv",
    "top_random_line_k_2_variants_2_gc_permutation_lnoise_0.1": data_folder + "/top_random_line_k_2_variants_2_gc_permutation_lnoise_0.1/case24_ieee_rts/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_again": data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_again/case24_ieee_rts/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.4": data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.4/case24_ieee_rts/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.4_north_central": data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.4_north_central/case24_ieee_rts/raw/pf_node.csv"

}

DESCRIPTIONS = {
    "top_none_gc_none": "No top nor GC perturbation",
    "top_none_gc_perm": "GC permutation",
    "top_none_gc_pert": "GC perturbation (sigma=3)",
    "top_none_gc_none_lnoise_0.1": "No top nor GC perturbation, local load noise of 0.1",
    "top_random_line_k_2_variants_2_gc_none": "topology perturbation: Random line k=2, variants=2, no GC perturbation",
    "top_random_line_k_2_variants_2_gc_permutation_lnoise_0.1": "topology perturbation: Random line k=2, variants=2, GC permutation, local load noise of 0.1",
    "top_none_gc_none_5000": "No top nor GC perturbation, 5000 samples",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_again": "topology perturbation: Random k=1, variants=2, GC permutation, local load noise of 0.2, global range of 0.6, 1000 samples",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.4": "topology perturbation: Random k=1, variants=2, GC permutation, local load noise of 0.2, global range of 0.4, 1000 samples",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.4_north_central": "topology perturbation: Random k=1, variants=2, GC permutation, local load noise of 0.2, global range of 0.4, 1000 samples, north central US"
}

REFERENCE_VERSION = "top_none_gc_none"

# (2) Fixed bus list
FIXED_BUSES = list(range(24))

# -----------------------------------------------------------------------------
# (3) Preload all CSVs
# -----------------------------------------------------------------------------
def load_csv(path):
    df = pd.read_csv(path)
    for col in ["Pd", "Qd", "Pg", "Qg"]:
        df[col] = df[col] / 100.0
    return df

ALL_DFS = {lab: load_csv(path) for lab, path in VERSIONS.items()}

# -----------------------------------------------------------------------------
# (4a) Boxplot across buses
# -----------------------------------------------------------------------------
def plot_versions(feature, selected_versions, max_scenarios=10000):
    dfs = {lab: df[df["scenario"] < max_scenarios] for lab, df in ALL_DFS.items() if lab in selected_versions}

    grouped = {lab: df.groupby("bus")[feature] for lab, df in dfs.items()}
    bus_types = dfs[list(dfs.keys())[0]].groupby("bus")[["PQ", "PV", "REF"]].first().values
    bus_types = ["PQ" if t[0] == 1 else "PV" if t[1] == 1 else "REF" for t in bus_types]

    fig, ax = plt.subplots(figsize=(12, 6))

    labels = list(grouped.keys())
    m = len(FIXED_BUSES)
    k = len(labels)
    base_positions = range(m)
    width = 0.7 / max(k, 1)
    offsets = [(i - (k - 1) / 2.0) * width for i in range(k)]

    COLORS = ["red", "blue", "green", "orange", "purple", "brown"]

    for i, lab in enumerate(labels):
        per_bus_data = [grouped[lab].get_group(b).values for b in FIXED_BUSES]
        positions = [p + offsets[i] for p in base_positions]
        bp = ax.boxplot(
            per_bus_data,
            positions=positions,
            widths=width * 0.9,
            showfliers=False,
            patch_artist=True,
            medianprops=dict(linewidth=1.5),
        )
        for patch in bp["boxes"]:
            patch.set_facecolor(COLORS[i % len(COLORS)])
            patch.set_alpha(0.5)

    ax.set_title(f"{feature} distribution across fixed buses")
    ax.set_ylabel(feature)
    ax.set_xticks(list(base_positions))
    ax.set_xticklabels([f"Bus {b}\n{bus_types[b]}" for b in FIXED_BUSES])
    ax.grid(True, axis="y", alpha=0.3)
    legend_patches = [mpatches.Patch(color=COLORS[i % len(COLORS)], label=DESCRIPTIONS[lab]) for i, lab in enumerate(labels)]
    ax.legend(handles=legend_patches, title="Datakit version", loc="best")

    plt.tight_layout()
    plt.show()

# -----------------------------------------------------------------------------
# (4b) Spider (radar) plot across features with log scale
# -----------------------------------------------------------------------------
def plot_spider(selected_versions, max_scenarios=10000):
    features = ["Vm","Va", "Pd", "Qd", "Pg", "Qg"]
    labels = list(selected_versions)
    COLORS = ["red", "blue", "green", "orange", "purple", "brown"]
    grouped_std = {lab: df.groupby("bus")[features].std() for lab, df in ALL_DFS.items()}


    # collect log-scaled std feature values
    stats = {}
    for lab in labels:
        df = ALL_DFS[lab]
        df = df[df["scenario"] < max_scenarios]
        vals = []
        for f in features:
            v = grouped_std[lab][f].mean()
            vals.append(v/grouped_std[REFERENCE_VERSION][f].mean())
        stats[lab] = vals

    # setup polar coordinates
    N = len(features)
    angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
    angles += angles[:1]  # loop back

    fig, ax = plt.subplots(figsize=(7, 7), subplot_kw=dict(polar=True))

    for i, lab in enumerate(labels):
        values = stats[lab]
        values += values[:1]
        ax.plot(angles, values, color=COLORS[i % len(COLORS)], linewidth=2, label=DESCRIPTIONS[lab])
        ax.fill(angles, values, color=COLORS[i % len(COLORS)], alpha=0.25)

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(features)
    ax.set_title("Normalized Mean Std. (across buses) of Feature Values by Variant", y=1.1)
    ax.grid(True)
    ax.legend(loc="upper right", bbox_to_anchor=(1.2, 1.1))

    plt.show()

# -----------------------------------------------------------------------------
# (5) Widgets
# -----------------------------------------------------------------------------
version_multi = widgets.SelectMultiple(
    options=list(VERSIONS.keys()),
    value=tuple(VERSIONS.keys()),
    description="Versions",
    rows=min(6, len(VERSIONS)),
    layout=widgets.Layout(width="300px"),
)

feature_toggle = widgets.ToggleButtons(
    options=["Vm", "Va", "Pd", "Qd", "Pg", "Qg"],
    value="Vm",
    description="Feature",
)

scenario_cap = widgets.IntSlider(
    value=10000, min=100, max=30000, step=100, description="Max scenarios"
)

out = widgets.Output()

def _on_change(_=None):
    with out:
        out.clear_output(wait=True)
        if not version_multi.value:
            print("Select at least one Datakit version.")
            return
        # boxplot for the chosen feature
        plot_versions(
            feature_toggle.value,
            list(version_multi.value),
            max_scenarios=scenario_cap.value,
        )
        # spider plot across features
        plot_spider(list(version_multi.value), max_scenarios=scenario_cap.value)

for w in (version_multi, feature_toggle, scenario_cap):
    w.observe(_on_change, names=["value"])

controls = widgets.HBox([version_multi, widgets.VBox([feature_toggle, scenario_cap])])
display(controls, out)
_on_change()


HBox(children=(SelectMultiple(description='Versions', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), layout=Layout(widt…

Output()