In [1]:
import torch
import numpy as np
import scipy.io
# %%
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import ipywidgets as widgets
from IPython.display import display
import numpy as np


In [2]:
data_folder = "/Users/albanpuech/benchmark_data/datakit"

In [3]:
# pglearn
pglearn_data_path = "/Users/albanpuech/benchmark_data/PGLearn-Small-118_ieee-nminus1/pglearn.csv"
df_pglearn_nminus1 = pd.read_csv(pglearn_data_path)

# pglearn_data_path = "/Users/albanpuech/benchmark_data/pglearn/pglearn.csv"
# df_pglearn = pd.read_csv(pglearn_data_path)

In [4]:
for col in ["Pd", "Qd", "Pg", "Qg"]:
    # df_pglearn[col] = df_pglearn[col] / 100
    df_pglearn_nminus1[col] = df_pglearn_nminus1[col] / 100

In [5]:
## Load pfdelta data
path_to_data = "/Users/albanpuech/benchmark_data/pfdelta/30000opfv2dataset_case118_ieee.mat"
pfdelta_data = scipy.io.loadmat(path_to_data)
baseMVA = pfdelta_data["baseMVA"]

num_samples = pfdelta_data["Dem"].shape[1]
num_buses = pfdelta_data["bus"].shape[0]

Pd = np.real(pfdelta_data["Dem"]).flatten(order='F')
Qd = np.imag(pfdelta_data["Dem"]).flatten(order='F')
Pg_at_gens = np.real(pfdelta_data["Gen"])
Qg_at_gens = np.imag(pfdelta_data["Gen"])
Vm = np.abs(pfdelta_data["Vol"]).flatten(order='F')
Va = np.angle(pfdelta_data["Vol"]).flatten(order='F')
Va = np.rad2deg(Va)


gen_bus = (
    torch.tensor(pfdelta_data["gen"][:, 0], dtype=torch.long) - 1
) 

Pg= np.zeros((num_buses, num_samples))
Qg= np.zeros((num_buses, num_samples))

Pg[gen_bus,:] = Pg_at_gens
Qg[gen_bus,:] = Qg_at_gens



Pg = Pg.flatten(order='F')
Qg = Qg.flatten(order='F')

df_delta = pd.DataFrame(
    {
        "bus": np.tile(np.arange(num_buses), num_samples),
        "scenario": np.repeat(np.arange(num_samples), num_buses),
        "Pd": Pd/100.0,
        "Qd": Qd/100.0,
        "Pg": Pg/100.0,
        "Qg": Qg/100.0,
        "Vm": Vm,
        "Va": Va,
    })



In [6]:
# %% [markdown]
# # Datakit-only interactive comparison (preloaded + spider plot with log scale)



# -----------------------------------------------------------------------------
# (1) Specify Datakit variants: label -> path
# -----------------------------------------------------------------------------
VERSIONS = {
    "baseline_n_minus_1": data_folder + "/baseline_n_minus_1/case118_ieee/raw/pf_node.csv",
    "pfdelta_nminus1": df_delta,
    "pglearn_nminus1": df_pglearn_nminus1,
    "baseline_n_minus_20": data_folder + "/baseline_n_minus_20/case118_ieee/raw/pf_node.csv",
    "power_factor_scaling": data_folder + "/power_factor_scaling/case118_ieee/raw/pf_node.csv",
    "baseline_n_minus_1_without_bounds": data_folder + "/baseline_n_minus_1_without_bounds/case118_ieee/raw/pf_node.csv",

    # "pglearn": df_pglearn,
}

DESCRIPTIONS = {
    "baseline_n_minus_1": "GridFM with n-1 perturbations",
    "pfdelta_nminus1": "pfdelta with n-1 perturbations",
    "pglearn_nminus1": "pglearn with n-1 perturbations",
    "baseline_n_minus_20": "GridFM with n-20 perturbations",
    "power_factor_scaling": "GridFM with power factor scaling",
    "baseline_n_minus_1_without_bounds": "GridFM with n-1 perturbations, no bounds",
    # "pglearn": "pglearn"
}

REFERENCE_VERSION = "baseline_n_minus_1"

# (2) Fixed bus list
FIXED_BUSES = np.concatenate((np.random.randint(0, 20, size=20), [58]))

# -----------------------------------------------------------------------------
# (3) Preload all CSVs
# -----------------------------------------------------------------------------
def load_csv(path):
    df = pd.read_csv(path)
    for col in ["Pd", "Qd", "Pg", "Qg"]:
        df[col] = df[col] / 100.0
    return df

ALL_DFS = {lab: (load_csv(obj) if isinstance(obj, str) else obj) for lab, obj in VERSIONS.items()}

# -----------------------------------------------------------------------------
# (4a) Boxplot across buses
# -----------------------------------------------------------------------------
def plot_versions(feature, selected_versions, max_scenarios=1000):



    dfs = {lab: df[df.scenario.isin(np.random.randint(0, num_samples, size=max_scenarios))] for lab, df in ALL_DFS.items() if lab in selected_versions}

    grouped = {lab: df.groupby("bus")[feature] for lab, df in dfs.items()}
    bus_types = dfs[REFERENCE_VERSION].groupby("bus")[["PQ", "PV", "REF"]].first().values
    bus_types = np.array(["PQ" if t[0] == 1 else "PV" if t[1] == 1 else "REF" for t in bus_types])

    fig, ax = plt.subplots(figsize=(12, 6))

    labels = list(grouped.keys())
    m = len(FIXED_BUSES)
    k = len(labels)
    base_positions = range(m)
    width = 0.7 / max(k, 1)
    offsets = [(i - (k - 1) / 2.0) * width for i in range(k)]

    COLORS = ["red", "blue", "green", "orange", "purple", "brown"]

    

    for i, lab in enumerate(labels):
        per_bus_data = [grouped[lab].get_group(b).values for b in FIXED_BUSES]
        print(lab, [data.mean() for data in per_bus_data])
        positions = [p + offsets[i] for p in base_positions]
        bp = ax.boxplot(
            per_bus_data,
            positions=positions,
            widths=width * 0.9,
            showfliers=False,
            patch_artist=True,
            medianprops=dict(linewidth=1.5),
        )
        for patch in bp["boxes"]:
            patch.set_facecolor(COLORS[i % len(COLORS)])
            patch.set_alpha(0.5)

    ax.set_title(f"{feature} distribution across buses", fontsize=14)
    ax.set_ylabel(feature, fontsize=14)
    ax.set_xticks(list(base_positions))
    # shift xticklabels to the right
    ax.set_xticklabels([f"Bus {b} {bus_types[b]}" for b in FIXED_BUSES], rotation=45, ha="right", fontsize=14)
    
    ax.grid(True, axis="y", alpha=0.3)
    legend_patches = [mpatches.Patch(color=COLORS[i % len(COLORS)], label=DESCRIPTIONS[lab]) for i, lab in enumerate(labels)]
    ax.legend(handles=legend_patches, title="Datakit version", loc="best", fontsize=14)

    plt.tight_layout()
    plt.savefig("boxplot.pdf")
    plt.show()

# -----------------------------------------------------------------------------
# (4b) Spider (radar) plot across features with log scale
# -----------------------------------------------------------------------------
def plot_spider(selected_versions, max_scenarios=1000):
    features = ["Vm","Va", "Pd", "Qd", "Pg", "Qg"]
    labels = list(selected_versions)
    COLORS = ["red", "blue", "green", "orange", "purple", "brown"]
    df_less_scenarios = {lab: df[df.scenario.isin(np.random.randint(0, num_samples, size=max_scenarios))] for lab, df in ALL_DFS.items() if lab in selected_versions}
    # print number of scenarios
    for lab, df in df_less_scenarios.items():
        print(lab, len(df))
    grouped_std = {lab: df.groupby("bus")[features].std() for lab, df in df_less_scenarios.items()}
    grouped_mean = {lab: df.groupby("bus")[features].mean() for lab, df in df_less_scenarios.items()}


    # collect log-scaled std feature values
    stats = {}
    for lab in labels:

        vals = []
        for f in features:
            if f == "Va":
                # convert to radians
                grouped_std[lab][f] = np.deg2rad(grouped_std[lab][f])
                grouped_mean[lab][f] = np.deg2rad(grouped_mean[lab][f])
            mean_std = grouped_std[lab][f].mean()
            vals.append(mean_std)


            # mean_mean = abs(grouped_mean[lab][f].mean())
            # vals.append(mean_std/mean_mean)
            # mean_std = grouped_std[lab][f].mean()
            # vals.append(mean_std)
        stats[lab] = vals
    # create df
    df_stats = pd.DataFrame(stats).T
    # column names are features
    df_stats.columns = features
    # row names are labels
    # normalize std by max
    df_stats = df_stats.div(df_stats.max(axis=0), axis=1)
    print(df_stats)

    

    # setup polar coordinates
    N = len(features)
    angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
    angles += angles[:1]  # loop back

    fig, ax = plt.subplots(figsize=(7, 7), subplot_kw=dict(polar=True))

    for i, lab in enumerate(labels):
        values = df_stats.loc[lab].to_list()
        values += values[:1]
        ax.plot(angles, values, color=COLORS[i % len(COLORS)], linewidth=2, label=DESCRIPTIONS[lab])
        ax.fill(angles, values, color=COLORS[i % len(COLORS)], alpha=0.25)

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(features, fontsize=14)
    ax.set_title("Data diversity across datasets", y=1.1, fontsize=14)
    ax.grid(True)
    ax.legend(loc="upper right", bbox_to_anchor=(1.2, 1.1), fontsize=14)
    # log scale
    # ax.set_yscale("log")
    # tight layout
    plt.tight_layout()

    plt.savefig("spider_plot.pdf")
    plt.show()


# -----------------------------------------------------------------------------
# (5) Widgets
# -----------------------------------------------------------------------------
version_multi = widgets.SelectMultiple(
    options=list(VERSIONS.keys()),
    value=tuple(VERSIONS.keys()),
    description="Versions",
    rows=min(6, len(VERSIONS)),
    layout=widgets.Layout(width="300px"),
)

feature_toggle = widgets.ToggleButtons(
    options=["Vm", "Va", "Pd", "Qd", "Pg", "Qg"],
    value="Pg",
    description="Feature",
)

scenario_cap = widgets.IntSlider(
    value=1000, min=100, max=30000, step=100, description="Max scenarios"
)

out = widgets.Output()

def _on_change(_=None):
    with out:
        out.clear_output(wait=True)
        if not version_multi.value:
            print("Select at least one Datakit version.")
            return
        # boxplot for the chosen feature
        plot_versions(
            feature_toggle.value,
            list(version_multi.value),
            max_scenarios=scenario_cap.value,
        )
        # spider plot across features
        plot_spider(list(version_multi.value), max_scenarios=scenario_cap.value)

for w in (version_multi, feature_toggle, scenario_cap):
    w.observe(_on_change, names=["value"])

controls = widgets.HBox([version_multi, widgets.VBox([feature_toggle, scenario_cap])])
display(controls, out)
_on_change()


HBox(children=(SelectMultiple(description='Versions', index=(0, 1, 2, 3, 4, 5), layout=Layout(width='300px'), …

Output()