In [1]:
import torch
import numpy as np
import scipy.io
# %%
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import ipywidgets as widgets
from IPython.display import display
import numpy as np


In [2]:
data_folder = "/Users/albanpuech/benchmark_data/datakit"

In [3]:
## Load pfdelta data
path_to_data = "/Users/albanpuech/benchmark_data/pfdelta/30000opfv2dataset_case118_ieee.mat"
pfdelta_data = scipy.io.loadmat(path_to_data)
baseMVA = pfdelta_data["baseMVA"]

num_samples = pfdelta_data["Dem"].shape[1]
num_buses = pfdelta_data["bus"].shape[0]




In [4]:
Pd = np.real(pfdelta_data["Dem"]).flatten(order='F')
Qd = np.imag(pfdelta_data["Dem"]).flatten(order='F')
Pg_at_gens = np.real(pfdelta_data["Gen"])
Qg_at_gens = np.imag(pfdelta_data["Gen"])
Vm = np.abs(pfdelta_data["Vol"]).flatten(order='F')
Va = np.angle(pfdelta_data["Vol"]).flatten(order='F')
Va = np.rad2deg(Va)

gen_bus = (
    torch.tensor(pfdelta_data["gen"][:, 0], dtype=torch.long) - 1
) 

Pg= np.zeros((num_buses, num_samples))
Qg= np.zeros((num_buses, num_samples))

Pg[gen_bus,:] = Pg_at_gens
Qg[gen_bus,:] = Qg_at_gens



Pg = Pg.flatten(order='F')
Qg = Qg.flatten(order='F')

df_delta = pd.DataFrame(
    {
        "bus": np.tile(np.arange(num_buses), num_samples),
        "scenario": np.repeat(np.arange(num_samples), num_buses),
        "Pd": Pd/100.0,
        "Qd": Qd/100.0,
        "Pg": Pg/100.0,
        "Qg": Qg/100.0,
        "Vm": Vm,
        "Va": Va,
    })



In [5]:
# %% [markdown]
# # Datakit-only interactive comparison (preloaded + spider plot with log scale)



# -----------------------------------------------------------------------------
# (1) Specify Datakit variants: label -> path
# -----------------------------------------------------------------------------
VERSIONS = {
    "top_random_line_k_1_variants_2_gc_permutation_lnoise_0.1": data_folder + "/top_random_line_k_1_variants_2_gc_permutation_lnoise_0.1/case118_ieee/raw/pf_node.csv",
    "top_random_k_20_variants_2_gc_permutation_lnoise_0.2_grange_0.4_north_central": data_folder + "/top_random_k_20_variants_2_gc_permutation_lnoise_0.2_grange_0.4_north_central/case118_ieee/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6": data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6/case118_ieee/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_again": data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_again/case118_ieee/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0":  data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0/case118_ieee/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0_large": data_folder + "/top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0_large/case118_ieee/raw/pf_node.csv",
    "top_random_k_10_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0": data_folder + "/top_random_k_10_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0/case118_ieee/raw/pf_node.csv",
    "top_random_k_10_variants_2_gc_permutation_load_random_pg_0.8_1.0": data_folder + "/top_random_k_10_variants_2_gc_permutation_load_random_pg_0.8_1.0/case118_ieee/raw/pf_node.csv",
    "higher_scaling_factor": data_folder + "/higher_scaling_factor/case118_ieee/raw/pf_node.csv",
    "test": data_folder + "/test/case118_ieee/raw/pf_node.csv",
    "top_random_k_1_variants_2_gc_permutation_random": data_folder + "/top_random_k_1_variants_2_gc_permutation_random/case118_ieee/raw/pf_node.csv",

    "pfdelta": df_delta
}

DESCRIPTIONS = {
    "top_random_line_k_1_variants_2_gc_permutation_lnoise_0.1": "datakit all perturbations",
    "top_random_k_20_variants_2_gc_permutation_lnoise_0.2_grange_0.4_north_central": "Topology perturbation: Random k=20, variants=2, GC permutation, local load noise of 0.2, global range of 0.4, north central US",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6": "+ datakit with more local noise",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_again": "+ datakit with more local noise",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0": "+ datakit with power factor scaling",
    "top_random_k_1_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0_large": "+ datakit with power factor scaling and 10000 scenarios",
    "top_random_k_10_variants_2_gc_permutation_lnoise_0.2_grange_0.6_pg_0.8_1.0": "+ power factor scaling and k =10",
    "top_random_k_10_variants_2_gc_permutation_load_random_pg_0.8_1.0": "+ random active power load and k =10",
    "higher_scaling_factor": "+ higher scaling factor",
    "test": "+ test",
    "top_random_k_1_variants_2_gc_permutation_random": "Topology perturbation: Random k=1, variants=2, GC permutation, random load",
    "pfdelta": "pfdelta"

}

REFERENCE_VERSION = "top_random_line_k_1_variants_2_gc_permutation_lnoise_0.1"

# (2) Fixed bus list
FIXED_BUSES = np.concatenate((np.random.randint(0, 20, size=20), [58]))

# -----------------------------------------------------------------------------
# (3) Preload all CSVs
# -----------------------------------------------------------------------------
def load_csv(path):
    df = pd.read_csv(path)
    for col in ["Pd", "Qd", "Pg", "Qg"]:
        df[col] = df[col] / 100.0
    return df

ALL_DFS = {lab: (load_csv(obj) if isinstance(obj, str) else obj) for lab, obj in VERSIONS.items()}

# -----------------------------------------------------------------------------
# (4a) Boxplot across buses
# -----------------------------------------------------------------------------
def plot_versions(feature, selected_versions, max_scenarios=1000):



    dfs = {lab: df[df.scenario.isin(np.random.randint(0, num_samples, size=max_scenarios))] for lab, df in ALL_DFS.items() if lab in selected_versions}

    grouped = {lab: df.groupby("bus")[feature] for lab, df in dfs.items()}
    bus_types = dfs[REFERENCE_VERSION].groupby("bus")[["PQ", "PV", "REF"]].first().values
    bus_types = np.array(["PQ" if t[0] == 1 else "PV" if t[1] == 1 else "REF" for t in bus_types])

    fig, ax = plt.subplots(figsize=(12, 6))

    labels = list(grouped.keys())
    m = len(FIXED_BUSES)
    k = len(labels)
    base_positions = range(m)
    width = 0.7 / max(k, 1)
    offsets = [(i - (k - 1) / 2.0) * width for i in range(k)]

    COLORS = ["red", "blue", "green", "orange", "purple", "brown"]

    

    for i, lab in enumerate(labels):
        per_bus_data = [grouped[lab].get_group(b).values for b in FIXED_BUSES]
        print(lab, [data.mean() for data in per_bus_data])
        positions = [p + offsets[i] for p in base_positions]
        bp = ax.boxplot(
            per_bus_data,
            positions=positions,
            widths=width * 0.9,
            showfliers=False,
            patch_artist=True,
            medianprops=dict(linewidth=1.5),
        )
        for patch in bp["boxes"]:
            patch.set_facecolor(COLORS[i % len(COLORS)])
            patch.set_alpha(0.5)

    ax.set_title(f"{feature} distribution across fixed buses")
    ax.set_ylabel(feature)
    ax.set_xticks(list(base_positions))
    # shift xticklabels to the right
    ax.set_xticklabels([f"Bus {b}{bus_types[b]}" for b in FIXED_BUSES], rotation=45, ha="right")
    ax.grid(True, axis="y", alpha=0.3)
    legend_patches = [mpatches.Patch(color=COLORS[i % len(COLORS)], label=DESCRIPTIONS[lab]) for i, lab in enumerate(labels)]
    ax.legend(handles=legend_patches, title="Datakit version", loc="best")

    plt.tight_layout()
    plt.show()

# -----------------------------------------------------------------------------
# (4b) Spider (radar) plot across features with log scale
# -----------------------------------------------------------------------------
def plot_spider(selected_versions, max_scenarios=1000):
    features = ["Vm","Va", "Pd", "Qd", "Pg", "Qg"]
    labels = list(selected_versions)
    COLORS = ["red", "blue", "green", "orange", "purple", "brown"]
    df_less_scenarios = {lab: df[df.scenario.isin(np.random.randint(0, num_samples, size=max_scenarios))] for lab, df in ALL_DFS.items() if lab in selected_versions}
    # print number of scenarios
    for lab, df in df_less_scenarios.items():
        print(lab, len(df))
    grouped_std = {lab: df.groupby("bus")[features].std() for lab, df in df_less_scenarios.items()}


    # collect log-scaled std feature values
    stats = {}
    for lab in labels:

        vals = []
        for f in features:
            v = grouped_std[lab][f].mean()
            vals.append(v/grouped_std[REFERENCE_VERSION][f].mean())
        stats[lab] = vals

    

    # setup polar coordinates
    N = len(features)
    angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
    angles += angles[:1]  # loop back

    fig, ax = plt.subplots(figsize=(7, 7), subplot_kw=dict(polar=True))

    for i, lab in enumerate(labels):
        values = stats[lab]
        values += values[:1]
        ax.plot(angles, values, color=COLORS[i % len(COLORS)], linewidth=2, label=DESCRIPTIONS[lab])
        ax.fill(angles, values, color=COLORS[i % len(COLORS)], alpha=0.25)

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(features)
    ax.set_title("Normalized Mean Std. (across buses) of Feature Values by Variant", y=1.1)
    ax.grid(True)
    ax.legend(loc="upper right", bbox_to_anchor=(1.2, 1.1))
    # log scale

    plt.show()

# -----------------------------------------------------------------------------
# (5) Widgets
# -----------------------------------------------------------------------------
version_multi = widgets.SelectMultiple(
    options=list(VERSIONS.keys()),
    value=tuple(VERSIONS.keys()),
    description="Versions",
    rows=min(6, len(VERSIONS)),
    layout=widgets.Layout(width="300px"),
)

feature_toggle = widgets.ToggleButtons(
    options=["Vm", "Va", "Pd", "Qd", "Pg", "Qg"],
    value="Pg",
    description="Feature",
)

scenario_cap = widgets.IntSlider(
    value=1000, min=100, max=30000, step=100, description="Max scenarios"
)

out = widgets.Output()

def _on_change(_=None):
    with out:
        out.clear_output(wait=True)
        if not version_multi.value:
            print("Select at least one Datakit version.")
            return
        # boxplot for the chosen feature
        plot_versions(
            feature_toggle.value,
            list(version_multi.value),
            max_scenarios=scenario_cap.value,
        )
        # spider plot across features
        plot_spider(list(version_multi.value), max_scenarios=scenario_cap.value)

for w in (version_multi, feature_toggle, scenario_cap):
    w.observe(_on_change, names=["value"])

controls = widgets.HBox([version_multi, widgets.VBox([feature_toggle, scenario_cap])])
display(controls, out)
_on_change()


HBox(children=(SelectMultiple(description='Versions', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), layout=Lay…

Output()