In [1]:
import pandas as pd
import scipy.stats
import os

In [14]:
def preprocess_logs(name, logdir="logs"):
    dfs = []
    duration_files = [f for f in os.listdir(logdir) if f"duration_{name}_" in f]
    n = len(duration_files)
    for i, f in enumerate(duration_files):
        df = pd.read_csv(f"{logdir}/{f}", index_col=0)
        df.columns = [f"{name}_{i}_{x}" for x in df.columns]
        dfs.append(df)

    status_file = next(f for f in os.listdir(logdir) if f"status_{name}_" in f)
    df = pd.read_csv(f"{logdir}/{status_file}", index_col=0)
    df.columns = [f"{name}_{x}" for x in df.columns]
    dfs.append(df)

    df = pd.concat(dfs, axis=1)
    df[f"{name}_time"] = df[[f"{name}_{i}_time" for i in range(n)]].mean(axis=1)
    df[f"{name}_time_std"] = df[[f"{name}_{i}_time" for i in range(n)]].std(axis=1)

    tfrmt = lambda x: x if pd.isna(x) else f"{x // 3600:02.0f}:{x // 60 % 60:02.0f}:{x % 60:02.0f}"
    df[f"{name}_duration"] = df[f"{name}_time"].apply(tfrmt)

    return df[[f"{name}_status", f"{name}_time", f"{name}_time_std", f"{name}_duration"]]

In [3]:
pyro_comprehensive = preprocess_average("pyro_comprehensive", "new_logs")
numpyro_comprehensive = preprocess_average("numpyro_comprehensive", "new_logs")
numpyro_mixed = preprocess_average("numpyro_mixed", "new_logs")
numpyro_generative = preprocess_average("numpyro_generative", "new_logs")
stan = preprocess_average("stan", "new_logs")

In [4]:
mean_res = pd.concat([pyro_comprehensive, numpyro_mixed, numpyro_comprehensive, numpyro_generative, stan], axis=1)
mean_res['example'] = mean_res.index.map(lambda x: x.split("-")[1])
mean_res['data'] = mean_res.index.map(lambda x: x.split("-")[0])
mean_res = mean_res.sort_values(by='example')

mean_res['speedup'] = (mean_res.stan_time / mean_res.numpyro_comprehensive_time)
speedups = mean_res[mean_res.numpyro_comprehensive_status == "success"]['speedup']
mean_res['speedup'] = speedups

In [9]:
print(f"Total benchs: {len(mean_res)}")
print(f"Stan successes: {len(mean_res[mean_res.stan_status == 'success'])}")
print(f"High relative std: {mean_res[mean_res.stan_time_std / mean_res.stan_time > 1.0].index.tolist()}")
mean_res = mean_res[mean_res.stan_status == "success"]
mean_res = mean_res[mean_res.stan_time_std / mean_res.stan_time < 1.0]
mean_res = mean_res.drop("arma-arma11")
print(f"Valid benchs: {len(mean_res)}")
print(f"Valid speedup: {len(mean_res['speedup'].dropna())}")
print(f"Time Stan: {mean_res.stan_time.sum() / 60**2}")

Total benchs: 31
Stan successes: 31
High relative std: []
Valid benchs: 30
Valid speedup: 25
Time Stan: 1.6236440341496168


In [10]:
print(f"average speedup: {scipy.stats.gmean(mean_res.speedup.dropna())}")
print(f"Relative std numpyro: {mean_res.numpyro_comprehensive_time_std.mean() / mean_res.numpyro_comprehensive_time.mean()}")
print(f"Relative std pyro: {mean_res.pyro_comprehensive_time_std.mean() / mean_res.pyro_comprehensive_time.mean()}")
print(f"Relative std stan: {mean_res.stan_time_std.mean() / mean_res.stan_time.mean()}")

average speedup: 2.2361243655614644
Relative std numpyro: 0.00795806625779655
Relative std pyro: 0.08171841789310347
Relative std stan: 0.021312511486504107


In [11]:
print(mean_res[["stan_status", "numpyro_comprehensive_status", "speedup"]].to_markdown())

|                                             | stan_status   | numpyro_comprehensive_status   |    speedup |
|:--------------------------------------------|:--------------|:-------------------------------|-----------:|
| mcycle_gp-accel_gp                          | success       | error                          | nan        |
| arK-arK                                     | success       | success                        |   1.40263  |
| dogs-dogs                                   | success       | success                        |   0.174855 |
| dogs-dogs_log                               | success       | success                        |   0.127352 |
| earnings-earn_height                        | success       | success                        |   5.44849  |
| eight_schools-eight_schools_centered        | success       | success                        |   0.337915 |
| eight_schools-eight_schools_noncentered     | success       | success                        |   0.17813  |
| garch-ga

In [12]:
filter_res = mean_res.drop(["arK-arK", "dogs-dogs", "dogs-dogs_log", "bball_drive_event_0-hmm_drive_0", "hmm_example-hmm_example"])
print(f"average speedup: {scipy.stats.gmean(filter_res.speedup.dropna())}")
print(f"Valid benchs: {len(filter_res)}")
print(f"Valid speedup: {len(filter_res['speedup'].dropna())}")

average speedup: 3.7262132714305323
Valid benchs: 25
Valid speedup: 20


In [8]:
mean_res['speedup'] = mean_res['speedup'].apply(lambda x: f"{x:02.2f}")
print(
    mean_res[
        [
            "example",
            "data",
            "stan_status",
            "stan_duration",
            "pyro_comprehensive_status",
            "pyro_comprehensive_duration",
            "numpyro_comprehensive_status",
            "numpyro_comprehensive_duration",
            "numpyro_mixed_status",
            "numpyro_mixed_duration",
            "numpyro_generative_status",
            "numpyro_generative_duration",
            "speedup",
        ]
    ].to_latex(index=False)
    .replace("success", "\smark")
    .replace("error", "\emark")
    .replace("mismatch", "\mmark")
    .replace("NaN", "")
    .replace("nan", "")
)

\begin{tabular}{lllllllllllll}
\toprule
                  example &                 data & stan\_status & stan\_duration & pyro\_comprehensive\_status & pyro\_comprehensive\_duration & numpyro\_comprehensive\_status & numpyro\_comprehensive\_duration & numpyro\_mixed\_status & numpyro\_mixed\_duration & numpyro\_generative\_status & numpyro\_generative\_duration & speedup \\
\midrule
                      arK &                  arK &     \smark &      00:00:57 &                   \smark &                    39:23:54 &                      \smark &                       00:00:40 &              \smark &               00:00:35 &                   \smark &                    00:00:39 &    1.43 \\
                     dogs &                 dogs &     \smark &      00:01:05 &                   \smark &                    24:58:30 &                      \smark &                       00:06:20 &              \smark &               00:06:22 &                   \smark &                    00:06

TypeError: loop of ufunc does not support argument 0 of type str which has no callable log method