In [1]:
# Import Required Libraries
import os
import json
import pandas as pd
import numpy as np
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go
from plotly.colors import sequential
import plotly.io as pio

In [2]:
# Load and Prepare Data
# Load the JSON file
with open("../docs/results.json", "r") as f:
    results_data = json.load(f)

# Extract the results list
results_list = results_data["results"]

# Create a flattened list of dictionaries for DataFrame creation
flattened_results = []
for result in results_list:
    # Create a dictionary for each result
    flat_result = {
        "Model": result["model_id"],
        "Task": result["task_id"],
        "Dataset": result["dataset"]
    }
    
    # Add all metrics from the metrics_summary/overall section
    if "metrics_summary" in result and "overall" in result["metrics_summary"]:
        metrics = result["metrics_summary"]["overall"]
        for metric_key, metric_value in metrics.items():
            flat_result[metric_key] = metric_value
    
    flattened_results.append(flat_result)

# Create DataFrame
df = pd.DataFrame(flattened_results)

print(f"Loaded data with shape: {df.shape}")
print(f"Columns: {', '.join(df.columns)}")
df

TypeError: list indices must be integers or slices, not str

In [None]:
# Rename columns to consistent format
df.columns = [col.strip().replace('"', "") for col in df.columns]
df["Task-Dataset"] = df["Task"] + " - " + df["Dataset"]

# Models, task-datasets, and metrics
models = sorted(df["Model"].unique())
models = [
    "LightGBM",
    "RandomForest",
    "XGBoost",
    "CNN",
    "GRU",
    "InceptionTime",
    "LSTM",
    "DeepseekR1Llama8b",
    "Gemini2p5flash",
    "Gemma3",
    "Mistral",
    "Llama3",
]

In [None]:

task_datasets = sorted(df["Task-Dataset"].unique())
metrics = [
    "auroc",
    "auprc",
    "normalized_auprc",
    # "sensitivity",
    # "specificity",
    # "f1_score",
    # "accuracy",
    # "balanced_accuracy",
    # "precision",
    # "recall",
    "mcc",
    # "kappa",
    "minpse",
]

In [None]:
task_datasets = [
    "aki - eicu",
    "mortality - eicu",
    "sepsis - eicu",
]

In [None]:
# Generate blue shades for task datasets
task_dataset_colors = sequential.Blues[len(sequential.Blues) - len(task_datasets):][::-1]
task_dataset_colors = task_dataset_colors[: len(task_datasets)]
traces_per_metric = len(task_datasets)

In [None]:
task_dataset_colors

['rgb(8,48,107)', 'rgb(8,81,156)', 'rgb(33,113,181)']

In [None]:
# Create traces for all (metric, task-dataset) pairs
fig = go.Figure()

initial_metric_index = 0  # Show only the first metric at start

for metric_index, metric in enumerate(metrics):
    metric_df = df.copy()
    for task_dataset in task_datasets:
        sub_df = metric_df[metric_df["Task-Dataset"] == task_dataset]
        y_vals = [
            (
                sub_df[sub_df["Model"] == model][metric].values[0]
                if model in sub_df["Model"].values
                else None
            )
            for model in models
        ]
        # Only show traces for the first metric initially
        visible = metric_index == initial_metric_index
        fig.add_trace(
            go.Bar(
                x=models,
                y=y_vals,
                name=task_dataset,
                marker_color=task_dataset_colors[task_datasets.index(task_dataset)],
                legendgroup=task_dataset,
                visible=visible,
                meta=task_dataset,
            )
        )

# Create dropdown visibility logic
visibility_map = []
for i in range(len(metrics)):
    vis = [False] * len(metrics) * traces_per_metric
    start = i * traces_per_metric
    vis[start : start + traces_per_metric] = [True] * traces_per_metric
    visibility_map.append(vis)

# Add dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": [
                {
                    "label": metric,
                    "method": "update",
                    "args": [{"visible": vis}, {"title": f"Benchmarking: {metric}"}],
                }
                for metric, vis in zip(metrics, visibility_map)
            ],
            "direction": "down",
            "showactive": True,
            "x": 1.15,
            "xanchor": "left",
            "y": 1.15,
            "yanchor": "top",
        }
    ],
    barmode="group",
    title=f"PULSE Scores: {metrics[0]}",
    xaxis_title="Model",
    yaxis_title="Score",
    legend_title="Task-Dataset",
    xaxis_tickangle=-30,
    width=1100,
    height=650,
)

# Update layout for better readability
fig.update_layout(
    title_font=dict(size=20),
    xaxis_title_font=dict(size=16),
    yaxis_title_font=dict(size=16),
    legend_title_font=dict(size=16),
    legend_font=dict(size=14),
    xaxis_tickfont=dict(size=12),
    yaxis_tickfont=dict(size=12),
)

# Add hover template for better readability
# fig.update_traces(
#     hovertemplate="<b>Model:</b> %{x}<br><b>Task-Dataset:</b> %{meta}<br><b>Score:</b> %{y:.2f}<extra></extra>",
#     meta=[task_dataset for task_dataset in task_datasets]
# )

fig.update_traces(
    hovertemplate="<b>Model:</b> %{x}<br><b>Task-Dataset:</b> %{meta}<br><b>Score:</b> %{y:.2f}<extra></extra>",
)

# Add a title to the figure
fig.update_layout(
    title_text="PULSE Scores",
    title_x=0.5,
    title_y=0.95,
)

# Add a background color
fig.update_layout(
    plot_bgcolor="rgba(0, 0, 0, 0.1)",
    paper_bgcolor="rgba(255, 255, 255, 1.0)",
)


# Save the interactive chart to HTML
pio.write_html(fig, file="benchmark_plot.html", full_html=True, include_plotlyjs="cdn")
fig.show()