In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from itertools import product
from typing import Tuple

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

import pandas as pd
import seaborn as sns

import sbibm
from sbibm.utils.io import get_tensor_from_csv

import sbi.analysis as analysis

from notebook import nre_gamma_default, wide_to_long

In [3]:
nre_gamma_default = float("Inf")

## what do we expect for experiments

In [4]:
columns = ['task', 'num_observation', 'algorithm', 'num_contrastive_parameters', 'gamma', 'num_blocks']

# general for nre
# tasks = sbibm.get_available_tasks()
tasks = ['slcp', 'two_moons', 'gaussian_mixture']
num_observation = list(range(1,11))
num_contrastive_parameters = [2, 10, 25, 50, 75, 100, 150, 200]
num_blocks = [2, 3]

# for cnre
gammas = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]

In [5]:
experiments_cnre = list(product(tasks, num_observation, ['CNRE'], num_contrastive_parameters, gammas, num_blocks))
df_cnre = pd.DataFrame(data=experiments_cnre, columns=columns)
print("cnre: ", len(df_cnre))

experiments_nre = list(product(tasks, num_observation, ['NRE-B'], num_contrastive_parameters, [nre_gamma_default], num_blocks))
df_nre = pd.DataFrame(data=experiments_nre, columns=columns)
print("nre: ", len(df_nre))

df_ref = pd.concat([df_cnre, df_nre])
print("total: ", len(df_ref))
df_ref
df_ref.to_csv("experiment-reference.csv")

cnre:  2880
nre:  480
total:  3360


## loading files

In [6]:
files = [
    "fix-joint.csv",
]
df_joint = pd.concat([pd.read_csv(file, index_col=0) for file in files], ignore_index=True)
df_joint_wide, gpr_joint = wide_to_long(df_joint)

In [7]:
files = [
    "fix-prior.csv",
]
df_prior = pd.concat([pd.read_csv(file, index_col=0) for file in files], ignore_index=True)
df_prior_wide, gpr_prior = wide_to_long(df_prior)

In [8]:
files = [
    "fix-bench.csv",
]
df_bench = pd.concat([pd.read_csv(file, index_col=0) for file in files], ignore_index=True)
df_bench_wide, gpr_bench = wide_to_long(df_bench)

In [9]:
# files = [
#     "fix-bench-extra.csv",
# ]
# df_bench_extra = pd.concat([pd.read_csv(file, index_col=0) for file in files], ignore_index=True)
# df_bench_extra_wide, grp_bench_extra = wide_to_long(df_bench_extra)

In [10]:
files = [
    "fix-sbibm.csv",
]
df_sbibm = pd.concat([pd.read_csv(file, index_col=0) for file in files], ignore_index=True)

## what's missing?

In [11]:
def get_missing_experiments(expr: pd.DataFrame, ref: pd.DataFrame = df_ref) -> pd.DataFrame:
    merge_df = ref.merge(expr, how='left', indicator=True)
    merge_df = merge_df[merge_df['_merge'] == 'left_only']
    return merge_df.drop("_merge", axis=1)

In [12]:
df_joint_to_check = df_joint_wide.reset_index()[columns].drop_duplicates()
df_joint_missing = get_missing_experiments(df_joint_to_check)
print(df_joint_missing["task"].unique())
print(df_joint_missing["num_observation"].unique())
print(df_joint_missing["algorithm"].unique())
print(df_joint_missing["gamma"].unique())
print(df_joint_missing["num_blocks"].unique())
df_joint_missing

[]
[]
[]
[]
[]


Unnamed: 0,task,num_observation,algorithm,num_contrastive_parameters,gamma,num_blocks


In [13]:
df_prior_to_check = df_prior_wide.reset_index()[columns].drop_duplicates()
df_prior_missing = get_missing_experiments(df_prior_to_check)
print(df_prior_missing["task"].unique())
print(df_prior_missing["num_observation"].unique())
print(df_prior_missing["algorithm"].unique())
print(df_prior_missing["gamma"].unique())
print(df_prior_missing["num_blocks"].unique())
df_prior_missing

[]
[]
[]
[]
[]


Unnamed: 0,task,num_observation,algorithm,num_contrastive_parameters,gamma,num_blocks


In [14]:
df_bench_to_check = df_bench_wide.reset_index()[columns].drop_duplicates()
df_bench_missing = get_missing_experiments(df_bench_to_check)
print(df_bench_missing["task"].unique())
print(df_bench_missing["num_observation"].unique())
print(df_bench_missing["algorithm"].unique())
print(df_bench_missing["gamma"].unique())
print(df_bench_missing["num_blocks"].unique())
df_bench_missing

[]
[]
[]
[]
[]


Unnamed: 0,task,num_observation,algorithm,num_contrastive_parameters,gamma,num_blocks


In [15]:
# df_bench_extra_to_check = df_bench_extra_wide.reset_index()[columns].drop_duplicates()
# df_bench_extra_missing = get_missing_experiments(df_bench_extra_to_check)
# print(df_bench_extra_missing["task"].unique())
# print(df_bench_extra_missing["num_observation"].unique())
# print(df_bench_extra_missing["algorithm"].unique())
# print(df_bench_extra_missing["gamma"].unique())
# print(df_bench_extra_missing["num_blocks"].unique())
# df_bench_extra_missing

## what only has a single initialization?

In [16]:
df_joint_wide_counts = df_joint_wide.groupby(columns, as_index=False).size()
print(df_joint_wide_counts["size"].unique())
df_joint_wide_counts[df_joint_wide_counts["size"] == 1]
df_joint_wide_counts[df_joint_wide_counts["size"] > 1]

[1]


Unnamed: 0,task,num_observation,algorithm,num_contrastive_parameters,gamma,num_blocks,size


In [17]:
df_prior_wide_counts = df_prior_wide.groupby(columns, as_index=False).size()
print(df_prior_wide_counts["size"].unique())
df_prior_wide_counts[df_prior_wide_counts["size"] == 1]
df_prior_wide_counts[df_prior_wide_counts["size"] > 1]

[1]


Unnamed: 0,task,num_observation,algorithm,num_contrastive_parameters,gamma,num_blocks,size


In [18]:
df_bench_wide_counts = df_bench_wide.groupby(columns, as_index=False).size()
print(df_bench_wide_counts["size"].unique())
df_bench_wide_counts[df_bench_wide_counts["size"] == 1]
df_bench_wide_counts[df_bench_wide_counts["size"] > 1]

[1]


Unnamed: 0,task,num_observation,algorithm,num_contrastive_parameters,gamma,num_blocks,size


## how many sbibm computations do we have?

In [19]:
assert (df_sbibm["algorithm"] == "CNRE").all()
df_sbibm_counts = df_sbibm.groupby(['task', "num_simulations_simulator"])
df_sbibm_counts.count()
# df_sbibm[df_sbibm["task"] == "slcp_distractors"]

Unnamed: 0_level_0,Unnamed: 1_level_0,seed,algorithm,K,num_atoms,num_contrastive_parameters,gamma,extra_theta_factor,reuse,hidden_features,num_blocks,...,MMD-09,MEDDIST-09,NLTP-09,C2ST_Z-10,MMD-10,MEDDIST-10,NLTP-10,NLTP,path,folder
task,num_simulations_simulator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
bernoulli_glm,1000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
bernoulli_glm,10000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
bernoulli_glm,100000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
bernoulli_glm_raw,1000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
bernoulli_glm_raw,10000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
bernoulli_glm_raw,100000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
gaussian_linear,1000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
gaussian_linear,10000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
gaussian_linear,100000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5
gaussian_linear_uniform,1000.0,5,5,5,5,5,5,5,5,5,5,...,5,5,0,5,5,5,0,0,5,5


In [20]:
df_sbibm.columns

Index(['task', 'seed', 'algorithm', 'K', 'num_atoms',
       'num_contrastive_parameters', 'gamma', 'extra_theta_factor', 'reuse',
       'hidden_features', 'num_blocks', 'AVG_LOG_RATIO', 'RT', 'C2ST_Z-01',
       'MMD-01', 'MEDDIST-01', 'NLTP-01', 'C2ST_Z-02', 'MMD-02', 'MEDDIST-02',
       'NLTP-02', 'C2ST_Z-03', 'MMD-03', 'MEDDIST-03', 'NLTP-03', 'C2ST_Z-04',
       'MMD-04', 'MEDDIST-04', 'NLTP-04', 'C2ST_Z-05', 'MMD-05', 'MEDDIST-05',
       'NLTP-05', 'C2ST_Z-06', 'MMD-06', 'MEDDIST-06', 'NLTP-06', 'C2ST_Z-07',
       'MMD-07', 'MEDDIST-07', 'NLTP-07', 'C2ST_Z-08', 'MMD-08', 'MEDDIST-08',
       'NLTP-08', 'C2ST_Z-09', 'MMD-09', 'MEDDIST-09', 'NLTP-09', 'C2ST_Z-10',
       'MMD-10', 'MEDDIST-10', 'NLTP-10', 'NLTP', 'num_simulations_simulator',
       'path', 'folder'],
      dtype='object')

## measuring total compute time

In [21]:
a = df_joint["RT"].sum() + df_prior["RT"].sum() + df_bench["RT"].sum()
b = df_sbibm["RT"].sum()
print(a, b)
print(3 * a + b)

26822.216890462238 26806.435656738282
107273.08632812499
