In [1]:
import json
import pandas as pd

with open("../results.json", 'r') as f:
    data = json.load(f)

In [2]:
from notebooks.util import get_benchmark_family, BENCHMARK_FAMILY_PRETTY
from nfmc.util import get_flow_family

df = pd.DataFrame(data['general_experiment'])
df = df[df['flow'] != 'None']
df['flow_family'] = [get_flow_family(f)[0] for f in df['flow']]
df = df[df['flow_family'] == 'autoregressive']
df['flow_subfamily'] = [get_flow_family(f)[1] for f in df['flow']]
df['transformer'] = [get_flow_family(f)[2] for f in df['flow']]
df['benchmark_family'] = [get_benchmark_family(b) for b in df['benchmark']]

masks = [
    (df['benchmark_family'] == f) for f in
    ['gaussian', 'non-gaussian (curved)', 'multimodal', 'non-gaussian (hierarchical)']
]

df['conditioner'] = df['flow_subfamily'].map({'coupling': 'C', 'masked': 'MADE'})
df['transformer'] = df['transformer'].map({'affine': 'Affine', 'nn': 'NN', 'spline': 'Spline'})
df['specific_transformer'] = df['flow'].map({
    'c-naf-deep-dense': 'naf-deep-dense',
    'c-lrsnsf': 'lrs',
    'c-naf-dense': 'naf-dense',
    'c-naf-deep': 'naf-deep',
    'c-rqnsf': 'rqs',
    'nice': 'shift',
    'ia-naf-dense': 'naf-dense',
    'realnvp': 'affine',
    'ia-naf-deep': 'naf-deep',
    'ia-rqnsf': 'rqs',
    'ia-lrsnsf': 'lrs',
    'ia-naf-deep-dense': 'naf-deep-dense',
    'iaf': 'affine'
})
df['benchmark_family'] = df['benchmark_family'].map(BENCHMARK_FAMILY_PRETTY)

# Joint comparison

In [3]:
from notebooks.util import standardized_rank_best_nf_kwargs

neutra_mcmc_mask = ~df['sampler'].isin(['jump_hmc', 'jump_mh', 'imh'])

processed_dfs = []
unprocessed_dfs = [
                      df[m & neutra_mcmc_mask] for m in
                      [(df['benchmark_family'] == b) for b in df['benchmark_family'].unique()]
                  ] + [df[neutra_mcmc_mask]]

for i, _unprocessed_df in enumerate(unprocessed_dfs):
    sr = standardized_rank_best_nf_kwargs(
        _unprocessed_df,
        rank_what=['conditioner', 'transformer'],
    ).apply(lambda e: (round(e.iloc[0], 2), round(e.iloc[1], 2)), axis=1).reset_index().set_axis(
        ['Conditioner', 'Transformer', _unprocessed_df['benchmark_family'].unique()[0]],
        axis=1
    ).iloc[[0, 2, 1, 3, 5, 4]]
    if i > 0:
        sr = sr.drop(['Conditioner', 'Transformer'], axis=1)
    processed_dfs.append(sr)
processed = pd.concat(processed_dfs, axis=1)
processed = processed.set_axis(list(processed.columns[:-1]) + ['All'], axis=1)
processed.insert(0, 'Combination', processed['Conditioner'] + '-' + processed['Transformer'])
processed = processed.drop(['Conditioner', 'Transformer'], axis=1)
processed = processed.set_index('Combination')
processed = processed[['Gaussian', 'Non-Gaussian', 'Multimodal', 'Real-world', 'All']]
processed

Unnamed: 0_level_0,Gaussian,Non-Gaussian,Multimodal,Real-world,All
Combination,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C-Affine,"(-0.44, 0.28)","(0.88, 0.0)","(0.59, 0.17)","(-0.37, 0.32)","(0.46, 0.23)"
C-Spline,"(-0.59, 0.56)","(-0.59, 0.88)","(-0.44, 0.65)","(-0.15, 0.4)","(-0.49, 0.18)"
C-NN,"(1.17, 0.29)","(-0.88, 0.0)","(0.88, 0.41)","(0.37, 0.37)","(0.29, 0.26)"
MADE-Affine,"(0.44, 0.44)","(1.46, 0.0)","(-0.15, 0.5)","(0.22, 0.46)","(0.52, 0.2)"
MADE-Spline,"(-0.73, 0.37)","(0.0, 0.29)","(-0.59, 0.38)","(0.15, 0.24)","(-0.23, 0.22)"
MADE-NN,"(0.15, 0.55)","(-0.88, 0.59)","(-0.29, 0.63)","(-0.22, 0.36)","(-0.55, 0.22)"


In [4]:
from notebooks.util import to_booktabs_table, make_bold_mask

to_booktabs_table(
    processed.reset_index(),
    precision=2,
    label='tab:autoregressive-nf-comparison',
    caption=r'$\overline{r} \pm \hat{\sigma}$ for all conditioner-transformer combinations in autoregressive NFs, estimated with default hyperparameters for each benchmark. NN denotes neural network transformers, C denotes coupling conditioners. The top 20\% combinations are shown in bold. Ranks computed separately for each target family.',
    bold_mask=make_bold_mask(processed, top_quantile=0.8),
    save_to_file='tables/autoregressive-nf-comparison.tex'
)

\begin{table}

            \renewrobustcmd{\bfseries}{\fontseries{b}\selectfont}
            \renewrobustcmd{\boldmath}{}
            \sisetup{%
                table-align-uncertainty=true,
                detect-all,
                separate-uncertainty=true,
                mode=text,
                round-mode=uncertainty,
                round-precision=2,
                table-format = 2.2(2),
                table-column-width=2.1cm
            }
            
\begin{tabular}{l
S
S
S
S
S}
\toprule
{Combination} & {Gaussian} & {Non-Gaussian} & {Multimodal} & {Real-world} & {All} \\
\midrule
{C-Affine} & -0.44(0.28) & 0.88 & 0.59(0.17) & \bfseries -0.37(0.32) & 0.46(0.23) \\
{C-Spline} & \bfseries -0.59(0.56) & -0.59(0.88) & -0.44(0.65) & -0.15(0.40) & \bfseries -0.49(0.18) \\
{C-NN} & 1.17(0.29) & \bfseries -0.88 & 0.88(0.41) & 0.37(0.37) & 0.29(0.26) \\
{MADE-Affine} & 0.44(0.44) & 1.46 & -0.15(0.50) & 0.22(0.46) & 0.52(0.20) \\
{MADE-Spline} & \bfseries -0.73(0.37) & 0.00(0.29) 

# Conditioner comparison
For each transformer and benchmark combination, check if the MADE conditioner is better than the coupling conditioner.

In [5]:
comparison_data = []

tmp = df[df['sampler'].isin(['neutra_mh', 'neutra_hmc'])][['benchmark', 'specific_transformer', 'sampler', 'conditioner', 'second_moment_squared_bias']]

for index, row in tmp[['benchmark', 'specific_transformer', 'sampler']].drop_duplicates().iterrows():
    subset = df[(df['benchmark'] == row['benchmark']) & (df['specific_transformer'] == row['specific_transformer']) & (
                df['sampler'] == row['sampler'])]
    if len(subset['conditioner'].unique()) < 2:
        continue
    # subset = subset[['benchmark', 'specific_transformer', 'sampler', 'conditioner', 'second_moment_squared_bias', 'flow_kwargs']]
    subset = subset[['conditioner', 'second_moment_squared_bias', 'flow_kwargs']]
    subset['flow_kwargs'] = subset['flow_kwargs'].map(str)
    made_subset = subset[subset['conditioner'] == 'MADE'].drop('conditioner', axis=1)
    c_subset = subset[subset['conditioner'] == 'C'].drop('conditioner', axis=1)
    merged = made_subset.merge(c_subset, on='flow_kwargs').drop('flow_kwargs', axis=1)
    merged.columns = ['b2_made', 'b2_c']
    made_wins = (merged['b2_made'] < merged['b2_c']).values.sum()
    c_wins = (merged['b2_made'] >= merged['b2_c']).values.sum()
    
    if made_wins == 0 and c_wins == 0:
        continue
    
    comparison_data.append({
        'sampler': row['sampler'],
        'specific_transformer': row['specific_transformer'],
        'made_wins': made_wins,
        'c_wins': c_wins,
        'c_win_ratio': c_wins / (made_wins + c_wins),
    })

In [6]:
tmp = pd.DataFrame(comparison_data)
tmp['transformer'] = tmp['specific_transformer'].map({
    'affine': 'affine',
    'lrs': 'spline',
    'rqs': 'spline',
    'naf-deep': 'nn',
    'naf-deep-dense': 'nn',
    'naf-dense': 'nn',
})
transformer_groups = tmp.groupby('transformer')['c_win_ratio'].mean()

In [7]:
tmp['c_win_ratio'].mean()

0.4565040650406504

In [8]:
# comparison table

tab = f"""\\begin{{table}}
\\renewrobustcmd{{\\bfseries}}{{\\fontseries{{b}}\\selectfont}}
\\renewrobustcmd{{\\boldmath}}{{}}
\\begin{{tabular}}{{lccccc}}
 & {{Affine}} & {{Spline}} & {{NN}} & {{All}} \\\\
\\midrule
{{Transformer}} & {transformer_groups['affine']:.2f} & {transformer_groups['spline']:.2f} & {transformer_groups['nn']:.2f} & {tmp['c_win_ratio'].mean():.2f} \\\\
\\bottomrule
\\end{{tabular}}
\\caption{{Percentage of experiments where NeuTra MCMC with coupling NFs attains lower $b^2$ than with MA or IA architectures.}}
\\label{{tab:neutra-conditioner-comparison}}
\\end{{table}}
"""
print(tab)

with open('tables/neutra-conditioner-comparison.tex', 'w') as f:
    f.write(tab)

\begin{table}
\renewrobustcmd{\bfseries}{\fontseries{b}\selectfont}
\renewrobustcmd{\boldmath}{}
\begin{tabular}{lccccc}
 & {Affine} & {Spline} & {NN} & {All} \\
\midrule
{Transformer} & 0.41 & 0.47 & 0.46 & 0.46 \\
\bottomrule
\end{tabular}
\caption{Percentage of experiments where NeuTra MCMC with coupling NFs attains lower $b^2$ than with MA or IA architectures.}
\label{tab:neutra-conditioner-comparison}
\end{table}

