In [105]:
import json
import pandas as pd
import warnings
import seaborn as sns
from pyprojroot import here
import plotly.express as px

methods = ["JustCopy", "TimeGAN", "Time-Transformer", "TransFusion", "TTS-GAN", "TimeVQVAE"]
datasets = ["D2", "D3", "D4", "D5", "D6", "D7"]

rows = []

for method in methods:
    for dataset in datasets:
        file_candidates = list(here('result').glob(f'numeric_{method}_{dataset}_*.json'))
        if len(file_candidates) != 1:
            warnings.warn(f"Ignoring {method} {dataset}: Expected one result file for {method} {dataset}, instead matched {file_candidates}.")
            continue
        file_path = file_candidates[0]
        
        with open(file_path) as f:
            data = json.load(f)
        
        rows.append(((method, dataset), data))

df_all = pd.DataFrame.from_dict(dict(rows), orient="index")
df_all.index = pd.MultiIndex.from_tuples(df_all.index, names=["Method", "Dataset"])

# load timings
def timings_path(method):
    paths = list(here("models").glob(f"*{method}*/timings.csv"))
    assert len(paths) == 1
    return paths[0]

timings = pd.DataFrame()
for method in methods:
    df = pd.read_csv(timings_path(method))
    df["Method"] = method
    timings = pd.concat([timings, df], ignore_index=True)

_timings = timings.melt(id_vars="Method", var_name="Dataset", value_name="Time").set_index(["Method", "Dataset"]).sort_index(level="Method")

df_all = pd.concat([df_all, _timings], axis = 1)


Ignoring JustCopy D6: Expected one result file for JustCopy D6, instead matched [].


Ignoring JustCopy D7: Expected one result file for JustCopy D7, instead matched [].



In [106]:
cm = sns.diverging_palette(h_neg=130, h_pos=0, as_cmap=True)

def style_results_table(df):
    return df.style\
    .background_gradient(cmap=cm, vmin=0, vmax=0.5, subset=["DS", "PS"])\
    .background_gradient(cmap=cm, vmin=0, vmax=1, subset=["C-FID"])\
    .background_gradient(cmap=cm, vmin=0, vmax=2, subset=["MDD"])\
    .background_gradient(cmap=cm, vmin=0, vmax=1, subset=["ACD"])\
    .background_gradient(cmap=cm, vmin=0, vmax=1, subset=["SD"])\
    .background_gradient(cmap=cm, vmin=0, vmax=3, subset=["KD"])\
    .background_gradient(cmap=cm, vmin=0, vmax=3, subset=["ED"])\
    .background_gradient(cmap=cm, vmin=0, vmax=15, subset=["DTW"])\
    .background_gradient(cmap=cm, vmin=0, vmax=1000, subset=["Time"])\
    .format({"DTW": "{:.2e}", "C-FID": "{:.2e}", "ED": "{:.2e}"})


style_results_table(df_all)

Unnamed: 0_level_0,Unnamed: 1_level_0,DS,PS,C-FID,MDD,ACD,SD,KD,ED,DTW,Time
Method,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
JustCopy,D2,0.010101,0.03772,-1.67e-12,0.000317,0.0,0.0,0.0,0.0,0.0,0
JustCopy,D3,0.01014,0.036687,-2.12e-12,0.000282,0.0,0.0,0.0,0.0,0.0,0
JustCopy,D4,0.013775,0.05706,-2.43e-13,0.000264,0.0,0.0,0.0,0.0,0.0,0
JustCopy,D5,0.016671,0.250396,-2.42e-15,0.060657,0.0,0.0,0.0,0.0,0.0,0
JustCopy,D6,,,,,,,,,,0
JustCopy,D7,,,,,,,,,,0
TTS-GAN,D2,0.5,0.136,1.73e+17,1.002635,4.518185,1.487815,3.213125,159000000.0,565000000.0,646
TTS-GAN,D3,0.48908,0.694247,240000000000.0,1.020405,7.752654,1.268523,3.031921,1060000.0,2700000.0,660
TTS-GAN,D4,0.404567,0.273916,58500000000000.0,1.015425,7.849365,0.333326,1.879808,14600000.0,42200000.0,649
TTS-GAN,D5,0.5,0.279841,119000000000.0,1.009141,4.521533,1.397439,1.732348,86400.0,570000.0,649


In [107]:
style_results_table(df_all.swaplevel("Method", "Dataset")\
    .sort_index(level=["Dataset", "Method"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,DS,PS,C-FID,MDD,ACD,SD,KD,ED,DTW,Time
Dataset,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
D2,JustCopy,0.010101,0.03772,-1.67e-12,0.000317,0.0,0.0,0.0,0.0,0.0,0
D2,TTS-GAN,0.5,0.136,1.73e+17,1.002635,4.518185,1.487815,3.213125,159000000.0,565000000.0,646
D2,Time-Transformer,0.324495,0.085745,0.266,0.629389,0.24659,0.582419,2.577668,1.31,3.38,13
D2,TimeGAN,0.093434,0.037907,0.0201,0.443126,0.174772,0.472857,2.370855,1.18,3.05,259
D2,TimeVQVAE,0.271212,0.042083,0.0861,0.600689,0.151148,0.300332,2.007588,1.16,2.99,73
D2,TransFusion,0.14697,0.039753,0.103,0.793641,0.033649,0.533438,2.629961,1.07,2.77,43
D3,JustCopy,0.01014,0.036687,-2.12e-12,0.000282,0.0,0.0,0.0,0.0,0.0,0
D3,TTS-GAN,0.48908,0.694247,240000000000.0,1.020405,7.752654,1.268523,3.031921,1060000.0,2700000.0,660
D3,Time-Transformer,0.297712,0.071347,0.185,0.747137,0.489822,0.532613,2.88285,2.48,6.3,16
D3,TimeGAN,0.224909,0.039703,0.0542,0.864601,0.144977,0.272758,2.354675,2.64,6.67,1373


## Spider Plots

In [125]:
rankings = df_all.unstack(level=1).rank()

ranking_by_metric = rankings.stack().reset_index().groupby("Method").mean()
spider_by_metric = ranking_by_metric.reset_index().melt(id_vars="Method")
px.line_polar(spider_by_metric, r="value", theta="variable", color="Method", line_close=True) \
    .update_layout(polar={"radialaxis": {"range": [6.9, 1], "dtick": 1}})

In [124]:
ranking_by_dataset = rankings.stack(level=0).reset_index().groupby("Method").mean()
spider_by_dataset = ranking_by_dataset.reset_index().melt(id_vars="Method")
px.line_polar(spider_by_dataset, r="value", theta="Dataset", color="Method", line_close=True) \
    .update_layout(polar={"radialaxis": {"range": [6.9,1], "dtick": 1}})

In [110]:
import plotly.graph_objects as go


def plot_average_rankings(df, title):
    s = df.unstack(level=1).rank().mean(axis=1)

    # Alternate y-offsets to avoid overlap
    y_offsets = [1, -1, 1, 1, 1]

    fig = go.Figure(go.Scatter(
        x=s.values,
        y=[0]*len(s),
        mode="markers+text",
        text=s.index,
        textposition=["top center" if y>0 else "bottom center" for y in y_offsets],
        marker=dict(size=12, color="steelblue")
    ))

    range_max = len(methods)

    # Thin horizontal number line
    fig.add_shape(type="line",
                x0=1, x1=range_max,
                y0=0, y1=0,
                line=dict(color="black", width=1))

    # Layout tweaks for minimal look
    fig.update_yaxes(visible=False)
    fig.update_xaxes(range=[0.8, range_max +0.2], showgrid=True, zeroline=False)
    fig.update_layout(
        height=200,
        xaxis_title="Average Rank",
        yaxis_title="",
        showlegend=False,
        margin=dict(t=60, b=60, l=150, r=150),
        title=title,
    )

    fig.show()

plot_average_rankings(df_all, "All Metrics")


In [111]:
import scikit_posthocs as sp

def conover_test(df):
    df = df.unstack(level=1).rank().T.reset_index().melt(id_vars=['level_0', 'Dataset'], var_name='Method', value_name='Rank')
    posthoc = sp.posthoc_conover(df, group_col='Method', val_col='Rank', p_adjust='holm')

    def highlight_below_threshold(val):
        color = 'background-color: turquoise' if val < 0.01 else ''
        return color

    return posthoc.style.applymap(highlight_below_threshold).format("{:.2e}")

conover_test(df_all)

Unnamed: 0,JustCopy,TTS-GAN,Time-Transformer,TimeGAN,TimeVQVAE,TransFusion
JustCopy,1.0,2.26e-59,3.84e-25,4.53e-28,2.56e-20,4.85e-11
TTS-GAN,2.26e-59,1.0,3.13e-18,1.91e-15,2.0999999999999998e-23,5.96e-36
Time-Transformer,3.84e-25,3.13e-18,1.0,0.38,0.259,9.24e-06
TimeGAN,4.53e-28,1.91e-15,0.38,1.0,0.051,1.3e-07
TimeVQVAE,2.56e-20,2.0999999999999998e-23,0.259,0.051,1.0,0.00382
TransFusion,4.85e-11,5.96e-36,9.24e-06,1.3e-07,0.00382,1.0


In [112]:
plot_average_rankings(df_all.drop(columns=["PS", "Time"]), "Fidelity")

In [113]:
conover_test(df_all.drop(columns=["PS", "Time"]))

Unnamed: 0,JustCopy,TTS-GAN,Time-Transformer,TimeGAN,TimeVQVAE,TransFusion
JustCopy,1.0,9.32e-53,2.7899999999999998e-24,9.65e-23,4.39e-17,1.06e-08
TTS-GAN,9.32e-53,1.0,8.14e-16,2.34e-17,9.34e-24,1.1500000000000001e-35
Time-Transformer,2.7899999999999998e-24,8.14e-16,1.0,0.597,0.0387,6.8e-08
TimeGAN,9.65e-23,2.34e-17,0.597,1.0,0.099,8.91e-07
TimeVQVAE,4.39e-17,9.34e-24,0.0387,0.099,1.0,0.00329
TransFusion,1.06e-08,1.1500000000000001e-35,6.8e-08,8.91e-07,0.00329,1.0


In [114]:
plot_average_rankings(df_all.drop(columns=["Time"]), "Without Timing")

In [115]:
conover_test(df_all.drop(columns=["Time"]))

Unnamed: 0,JustCopy,TTS-GAN,Time-Transformer,TimeGAN,TimeVQVAE,TransFusion
JustCopy,1.0,8.47e-58,2.64e-27,3.28e-24,5.56e-20,1.26e-08
TTS-GAN,8.47e-58,1.0,2.89e-16,2.29e-19,4.54e-24,2.36e-40
Time-Transformer,2.64e-27,2.89e-16,1.0,0.33,0.0548,4.41e-10
TimeGAN,3.28e-24,2.29e-19,0.33,1.0,0.327,8.33e-08
TimeVQVAE,5.56e-20,4.54e-24,0.0548,0.327,1.0,6.04e-05
TransFusion,1.26e-08,2.36e-40,4.41e-10,8.33e-08,6.04e-05,1.0


In [116]:
plot_average_rankings(df_all["PS"], "Predictive Score")

In [117]:
conover_test(df_all["PS"].to_frame())

Unnamed: 0,JustCopy,TTS-GAN,Time-Transformer,TimeGAN,TimeVQVAE,TransFusion
JustCopy,1.0,8.22e-05,0.00562,0.0788,0.00579,0.816
TTS-GAN,8.22e-05,1.0,0.505,0.0395,0.505,0.000173
Time-Transformer,0.00562,0.505,1.0,0.761,0.965,0.017
TimeGAN,0.0788,0.0395,0.761,1.0,0.761,0.282
TimeVQVAE,0.00579,0.505,0.965,0.761,1.0,0.0174
TransFusion,0.816,0.000173,0.017,0.282,0.0174,1.0


In [118]:
from scipy.stats import friedmanchisquare

df = df_all.unstack(level=1).T
ranks = df.rank(axis=0, method='average', ascending=False)

stat, p = friedmanchisquare(*[ranks.loc[m].values for m in ranks.index])
p

1.3455204629986705e-12

In [129]:
import pandas as pd

def leaderboard(df):
    # Create a new DataFrame with the same columns
    out = pd.DataFrame(index=df.index.levels[1], columns=df.columns)

    for lvl2 in df.index.levels[1]:
        # Select all rows with this level2
        subset = df.xs(lvl2, level=1)
        # Find max in each column
        min_idx = subset.idxmin()  # gives Level1 with max for each column
        out.loc[lvl2] = min_idx


    def color_cells(value):
        if value == "TTS-GAN":
            return 'background-color: yellow'
        if value == "TransFusion":
            return 'background-color: lightgreen'
        if value == "TimeGAN":
            return 'background-color: lightblue'
        if value == "Time-Transformer":
            return 'background-color: pink'
        if value == "TimeVQVAE":
            return 'background-color: turquoise'

    return out.style.applymap(color_cells)

leaderboard(df_all.drop(index="JustCopy", level="Method"))

Unnamed: 0_level_0,DS,PS,C-FID,MDD,ACD,SD,KD,ED,DTW,Time
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
D2,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TransFusion,TimeVQVAE,TimeVQVAE,TransFusion,TransFusion,Time-Transformer
D3,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TimeVQVAE,TimeVQVAE,Time-Transformer
D4,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TimeVQVAE,TimeVQVAE,Time-Transformer,Time-Transformer,Time-Transformer
D5,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,Time-Transformer,Time-Transformer,Time-Transformer
D6,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TransFusion,TimeVQVAE,TimeVQVAE,Time-Transformer
D7,TransFusion,TimeGAN,TransFusion,TimeGAN,TransFusion,TimeVQVAE,TimeVQVAE,TransFusion,TransFusion,Time-Transformer


In [120]:
leaderboard(df_all[df_all.index.get_level_values(level=0).isin(["TTS-GAN", "TimeGAN"])])

Unnamed: 0_level_0,DS,PS,C-FID,MDD,ACD,SD,KD,ED,DTW,Time
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
D2,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN
D3,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TTS-GAN
D4,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TTS-GAN
D5,TimeGAN,TTS-GAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN
D6,TTS-GAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TTS-GAN
D7,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TTS-GAN


In [121]:
leaderboard(df_all[df_all.index.get_level_values(level=0).isin(["Time-Transformer", "TimeGAN"])])

Unnamed: 0_level_0,DS,PS,C-FID,MDD,ACD,SD,KD,ED,DTW,Time
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
D2,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,TimeGAN,Time-Transformer
D3,TimeGAN,TimeGAN,TimeGAN,Time-Transformer,TimeGAN,TimeGAN,TimeGAN,Time-Transformer,Time-Transformer,Time-Transformer
D4,TimeGAN,Time-Transformer,Time-Transformer,Time-Transformer,TimeGAN,TimeGAN,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer
D5,TimeGAN,TimeGAN,TimeGAN,TimeGAN,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer
D6,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer,Time-Transformer,TimeGAN,Time-Transformer,Time-Transformer,Time-Transformer
D7,TimeGAN,TimeGAN,TimeGAN,TimeGAN,Time-Transformer,TimeGAN,Time-Transformer,TimeGAN,TimeGAN,Time-Transformer


In [122]:
df = rankings
utility_cols = [col for col in df.columns if col[0] in ["PS"]]

fidelity_cols = [col for col in df.columns if col[0] not in ["PS", "Time"]]

timing_cols = [col for col in df.columns if col[0] in ["Time"]]

# Step 3: Compute averages
avg_utility = df[utility_cols].mean(axis=1)
avg_fidelity = df[fidelity_cols].mean(axis=1)
avg_timing = df[timing_cols].mean(axis=1)

# Step 4: Create new dataframe
new_df = pd.DataFrame({
    "Utility": avg_utility,
    "Fidelity": avg_fidelity,
    "Time": avg_timing
})

new_df.style.background_gradient(cmap=cm)

Unnamed: 0_level_0,Utility,Fidelity,Time
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JustCopy,1.25,1.0,1.0
TTS-GAN,5.166667,5.572917,5.0
Time-Transformer,4.0,3.645833,2.0
TimeGAN,3.166667,3.541667,5.5
TimeVQVAE,4.0,3.135417,3.166667
TransFusion,1.833333,2.4375,4.333333


In [123]:
timings.set_index("Method")

Unnamed: 0_level_0,D2,D3,D4,D5,D6,D7
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
JustCopy,0,0,0,0,0,0
TimeGAN,259,1373,1195,304,1252,1385
Time-Transformer,13,16,28,66,77,55
TransFusion,43,222,454,201,1387,856
TTS-GAN,646,660,649,649,660,645
TimeVQVAE,73,94,94,74,88,86
