# Overview

This notebook plots the main simulation figures for the CPI-DNN paper

- _Figure 1_: High-level comparison of CPI and Perfmit
- _Figure 2_: Effect of sample size
- _Figure 3_: Extensive generative model benchmark, inclduding state of the art.
- _Figure 4_: Application example

In [1]:
import pathlib
import numpy as np
import pandas as pd
import altair as alt
import warnings
warnings.filterwarnings("ignore")

In [2]:
res_path = pathlib.Path('../results/results_csv')
list(res_path.glob('*.csv'))

[PosixPath('../results/results_csv/time_bars_blocks_100_UKBB_single.csv'),
 PosixPath('../results/results_csv/simulation_results_blocks_100_CPI_n_1000_p_50_cpi_depth_10.csv'),
 PosixPath('../results/results_csv/simulation_results_blocks_100_allMethods_pred_final.csv'),
 PosixPath('../results/results_csv/simulation_results_blocks_100_dnn_dnn_py_perm_100--1000.csv'),
 PosixPath('../results/results_csv/time_bars_blocks_100_Mi_dnn_dnn_py_300:100.csv'),
 PosixPath('../results/results_csv/type1error_blocks_100_CPI_LOCO_DNN.csv'),
 PosixPath('../results/results_csv/simulation_results_blocks_100_CPI_n_1000_p_50_cpi_depth_4.csv'),
 PosixPath('../results/results_csv/time_bars_blocks_100_n_10000_p_50_cpi_permfit.csv'),
 PosixPath('../results/results_csv/type1error_blocks_100_dnn_dnn_py_perm_100--1000.csv'),
 PosixPath('../results/results_csv/type1error_blocks_100_UKBB_single.csv'),
 PosixPath('../results/results_csv/AUC_blocks_100_n_10000_p_50_cpi_permfit.csv'),
 PosixPath('../results/results_csv

## Figure 1

In [3]:
res_path = pathlib.Path('../results/results_csv')
list(res_path.glob('*Mi*.csv'))

[PosixPath('../results/results_csv/time_bars_blocks_100_Mi_dnn_dnn_py_300:100.csv'),
 PosixPath('../results/results_csv/AUC_blocks_100_Mi_dnn_dnn_py_300:100.csv'),
 PosixPath('../results/results_csv/simulation_results_blocks_100_Mi_dnn_dnn_py_300:100.csv'),
 PosixPath('../results/results_csv/power_blocks_100_Mi_dnn_dnn_py_300:100.csv'),
 PosixPath('../results/results_csv/type1error_blocks_100_Mi_dnn_dnn_py_300:100.csv')]

In [4]:
df_auc = pd.read_csv(res_path / 'AUC_blocks_100_Mi_dnn_dnn_py_300:100.csv')
df_t1_error = pd.read_csv(res_path / 'type1error_blocks_100_Mi_dnn_dnn_py_300:100.csv')

In [5]:
df_auc

Unnamed: 0.1,Unnamed: 0,Method,correlation,n_samples,prob_data,iteration,V1
0,1,Permfit-DNN,0.0,300,regression_perm,1,0.810526
1,2,Permfit-DNN,0.0,300,regression_perm,2,0.776842
2,3,Permfit-DNN,0.0,300,regression_perm,3,0.938947
3,4,Permfit-DNN,0.0,300,regression_perm,4,0.621053
4,5,Permfit-DNN,0.0,300,regression_perm,5,0.680000
...,...,...,...,...,...,...,...
795,796,CPI-DNN,0.8,300,regression_perm,96,0.621053
796,797,CPI-DNN,0.8,300,regression_perm,97,0.861053
797,798,CPI-DNN,0.8,300,regression_perm,98,0.905263
798,799,CPI-DNN,0.8,300,regression_perm,99,0.842105


In [6]:
## Create left panel ##
height = 45
box_t1 = alt.Chart(
    df_t1_error
).mark_boxplot(
    size=20, outliers=False, ticks=True, opacity=0.5                                     
).encode(
    y=alt.Y('Method:N', title=None),
    x=alt.X('V1:Q', title='Type-I error', scale=alt.Scale(domain=(0, 0.4))),
    color='Method:N'
).properties(
    height=height,
    width=300
)

point_t1 = alt.Chart(
    df_t1_error
).mark_point(
    size=40, opacity=0.3
).encode(
    y=alt.Y('Method:N', title=None),
    x=alt.X('V1:Q', 
            title='Type-I error',scale=alt.Scale(domain=(0, 0.4))),
    color='Method:N'
).properties(
    height=height,
    width=300
).transform_sample(
    70
)

rule1 = alt.Chart(pd.DataFrame({
  'V1': [0.05],
})).mark_rule(size=1.5, strokeDash=[5, 5]).encode(
  x=alt.X('V1:Q', axis=alt.Axis(tickCount=5)),
  color = alt.value('black')
)

fig_t1_Mi = point_t1 + box_t1 + rule1
fig_t1_Mi = fig_t1_Mi.facet(
    row=alt.Row('correlation:O', title='Correlation strength',
                sort='descending')
).properties(
    title="A"
)

## create right panel ##
box_auc = alt.Chart(
    df_auc
).mark_boxplot(
    size=20, outliers=False, ticks=True, opacity=0.5
).encode(
    y=alt.Y('Method:N', axis=alt.Axis(labels=False, title=None)),
    x=alt.X('V1:Q', title='AUC', scale=alt.Scale(domain=(0.5, 1))),
    color='Method:N'
).properties(
    height=height,
    width=300
)

point_auc = alt.Chart(
    df_auc
).mark_point(
    size=40, opacity=0.3
).encode(
    y=alt.Y('Method:N', axis=alt.Axis(labels=False, title=None)),
    x=alt.X('V1:Q',  title='AUC', scale=alt.Scale(domain=(0.5, 1))),
    color='Method:N'
).properties(
    height=height,
    width=300
).transform_sample(
    70
)


rule2 = alt.Chart(pd.DataFrame({
  'V1': [0.5],
  'nominal_error': ['nominal Type-1 error'],
  'color': ['black']
})).mark_rule(size=1.5, color='black').encode(
  x='V1:Q'
)

fig_auc_Mi = point_auc + box_auc + rule2
fig_auc_Mi = fig_auc_Mi.facet(
    row=alt.Row('correlation:O',
                title=None,
                sort='descending',
                header=alt.Header(title=None, labels=False))
).properties(
    title="B"
)

my_font = 'Helvetica'
fig = alt.hconcat(
    fig_t1_Mi,
    fig_auc_Mi
).configure_axis(
    grid=False, 
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    labelFontSize=16,
    titleFontSize=20
).configure_header(
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    titleFontSize=20,
    labelFontSize=16
).configure_view(
    strokeWidth=0
).configure_title(
    font=my_font,
    fontSize=20
).configure_legend(
    titleFontSize=20,
    labelFontSize=20,
    labelLimit=0,
    orient='none',
    legendX=150,
    legendY=175
)

# fig.save('figure_1.svg')
# fig.save('figure_1.png', scale_factor=3)
fig

## Figure 2

In [7]:
list(res_path.glob('*100--1000*.csv'))

[PosixPath('../results/results_csv/simulation_results_blocks_100_dnn_dnn_py_perm_100--1000.csv'),
 PosixPath('../results/results_csv/type1error_blocks_100_dnn_dnn_py_perm_100--1000.csv'),
 PosixPath('../results/results_csv/AUC_blocks_100_dnn_dnn_py_perm_100--1000.csv')]

In [8]:
df_auc_range = pd.read_csv(res_path / 'AUC_blocks_100_dnn_dnn_py_perm_100--1000.csv')
df_t1_error_range = pd.read_csv(res_path / 'type1error_blocks_100_dnn_dnn_py_perm_100--1000.csv')

In [9]:
scenarios = ['Classification', 'Plain linear', 'Regression with ReLu', 'Interactions only', 'Main effects and Interactions']
            
df_t1_error_range_agg = df_t1_error_range.groupby(['Method', 'correlation', 'n_samples', 'prob_data'])['V2'].agg([np.mean, np.std]).reset_index()
df_t1_error_range_agg['ymin'] = df_t1_error_range_agg['mean'] - df_t1_error_range_agg['std']
df_t1_error_range_agg['ymax'] = df_t1_error_range_agg['mean'] + df_t1_error_range_agg['std']
df_t1_error_range_agg = df_t1_error_range_agg.set_index('prob_data').loc[scenarios].reset_index()
df_t1_error_range_agg.loc[::10]

Unnamed: 0,prob_data,Method,correlation,n_samples,mean,std,ymin,ymax
0,Classification,CPI-DNN,0.8,100,0.039,0.036716,0.002284,0.075716
10,Classification,Permfit-DNN,0.8,100,0.125333,0.06802,0.057313,0.193353
20,Plain linear,CPI-DNN,0.8,100,0.034667,0.030675,0.003992,0.065342
30,Plain linear,Permfit-DNN,0.8,100,0.243333,0.092539,0.150795,0.335872
40,Regression with ReLu,CPI-DNN,0.8,100,0.036,0.035353,0.000647,0.071353
50,Regression with ReLu,Permfit-DNN,0.8,100,0.170333,0.075789,0.094544,0.246122
60,Interactions only,CPI-DNN,0.8,100,0.043333,0.037754,0.005579,0.081087
70,Interactions only,Permfit-DNN,0.8,100,0.095333,0.067006,0.028327,0.162339
80,Main effects and Interactions,CPI-DNN,0.8,100,0.046667,0.037003,0.009663,0.08367
90,Main effects and Interactions,Permfit-DNN,0.8,100,0.103,0.066683,0.036317,0.169683


In [10]:
df_auc_range['ymin'] = df_auc_range['mean'] - df_auc_range['sd']
df_auc_range['ymax'] = df_auc_range['mean'] + df_auc_range['sd']
df_auc_range = df_auc_range.set_index('prob_data').loc[scenarios].reset_index()

In [11]:
def plot_fig2(df_data_t1, df_data_auc, title_x=None, title_facet=None, title_y=None,
              title_subfig1='', title_subfig2='', height=110, width=240):
    rule1 = alt.Chart(pd.DataFrame({
      'V1': [0.05],
      'Method': ['Type-I error nominal rate'],
      'color': ['black']
    })).mark_rule(size=1.5, strokeDash=[5, 5]).encode(
      y='V1:Q',
      color = alt.value('black')
    )

    points = alt.Chart(
        df_data_t1
    ).mark_point(
    ).encode(
        x='n_samples',
        y='mean',
        color='Method'
    )

    err = alt.Chart(
        df_data_t1
    ).mark_errorband(
    ).encode(
        x='n_samples',
        y=alt.Y('ymin', title=''),
        y2=alt.Y2('ymax', title=None),
        color='Method'
    )

    lines = alt.Chart(
        df_data_t1
    ).mark_line(
    ).encode(
        x=alt.X('n_samples', title=None),
        y=alt.Y('mean', title=title_y[1]),
        color='Method'
    ).properties(
        height=height,
        width=width
    )

    fig_t1_err_range = (points + err + lines + rule1).facet(
        column=alt.Column('prob_data:N', title=title_facet)
    ).properties(title=title_subfig1)

    ### AUC ###
    rule2 = alt.Chart(pd.DataFrame({
      'V1': [0.5],
      'Method': ['Type-I error nominal rate'],
      'color': ['black']
    })).mark_rule(size=1.5).encode(
      y='V1:Q',
      color = alt.value('black')
    )

    points_auc = alt.Chart(
        df_data_auc
    ).mark_point(
    ).encode(
        x=alt.X('n_samples', title=title_x),
        y=alt.Y('mean', scale=alt.Scale(domain=[0.4, 1.0])),
        color='Method'
    )

    err_auc = alt.Chart(
        df_data_auc
    ).mark_errorband(
    ).encode(
        x=alt.X('n_samples', title=title_x),
        y=alt.Y('ymin', title='', scale=alt.Scale(domain=[0.4, 1.0])),
        y2=alt.Y2('ymax', title=None),
        color='Method'
    )

    lines_auc = alt.Chart(
        df_data_auc
    ).mark_line(
    ).encode(
        x=alt.X('n_samples', title=title_x),
        y=alt.Y('mean', title=title_y[0], scale=alt.Scale(domain=[0.4, 1.0])),
        color='Method'
    ).properties(
        height=height,
        width=width
    )

    fig_auc_range = (points_auc + err_auc + lines_auc + rule2).facet(
        column=alt.Column('prob_data:N', title=None,
                          header=alt.Header(labels=False))).properties(
        title=title_subfig2
    )
    fig2 = alt.vconcat(fig_t1_err_range, fig_auc_range)
    return fig2

prob_data = [('Classification', None, None, ('AUC', 'Type-I error'), 'A', 'B'),
             ('Plain linear', None, None, (None, None), ' ', ' '),
             ('Regression with ReLu', "Number of samples", "Scenario", (None, None), ' ', ' '),
             ('Interactions only', None, None, (None, None), ' ', ' '),
             ('Main effects and Interactions', None, None, (None, None), ' ', ' ')]
list_figs = []
for el in prob_data:
    df_t1_tmp = df_t1_error_range_agg[df_t1_error_range_agg['prob_data'] == el[0]]
    df_auc_tmp = df_auc_range[df_auc_range['prob_data'] == el[0]]
    list_figs.append(plot_fig2(df_t1_tmp, df_auc_tmp, el[1], el[2], el[3], el[4], el[5]))

fig2 = alt.hconcat(
    list_figs[0],
    list_figs[1],
    list_figs[2],
    list_figs[3],
    list_figs[4]
).configure(
    concat=alt.CompositionConfig(spacing=5)
)

my_font = 'Helvetica'
fig2 = fig2.configure_axis(
    grid=True, 
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    labelFontSize=24,
    titleFontSize=28,
    titlePadding=20
).configure_header(
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    titleFontSize=28,
    labelFontSize=24
).configure_view(
    strokeWidth=0
).configure_title(
    font=my_font,
    fontSize=28
).configure_legend(
    titleFontSize=28,
    labelFontSize=24,
    orient='none',
    columns=2,
    legendX=900,
    legendY=-110,
    labelLimit=0
)

# fig2.save('figure_2.svg')
# fig2.save('figure_2.png', scale_factor=3)
fig2

## Figure 3

In [12]:
list(res_path.glob('*all*.csv'))

[PosixPath('../results/results_csv/simulation_results_blocks_100_allMethods_pred_final.csv'),
 PosixPath('../results/results_csv/Result_single_FREQ_all_imp_outer_10_inner.csv'),
 PosixPath('../results/results_csv/power_blocks_100_allMethods_pred_imp_final.csv'),
 PosixPath('../results/results_csv/type1error_blocks_100_allMethods_pred_imp_final.csv'),
 PosixPath('../results/results_csv/AUC_blocks_100_allMethods_pred_imp_final_withoutPval.csv'),
 PosixPath('../results/results_csv/Result_single_10Fold_FREQ_all_imp.csv'),
 PosixPath('../results/results_csv/time_bars_blocks_100_allMethods_pred_imp_final.csv'),
 PosixPath('../results/results_csv/AUC_blocks_100_allMethods_pred_imp_final_withPval.csv')]

In [13]:
df_auc_all = pd.read_csv(res_path / 'AUC_blocks_100_allMethods_pred_imp_final_withPval.csv')
df_auc_all = df_auc_all.rename(columns={'method': 'Method', 'prob_data':'Problem Data'})
df_auc_all['Problem Data'] = df_auc_all['Problem Data'].map({'classification':'Classification',
                                                             'regression': 'Plain linear',
                                                             'regression_relu': 'Regression with ReLu',
                                                             'regression_product': 'Interactions only',
                                                             'regression_combine': 'Main effects and Interactions'})
df_auc_all['Method'] = df_auc_all['Method'].map({'Marg':'Marginal',
                                                 'd0CRT': 'd0CRT',
                                                 'Permfit-DNN': 'Permfit-DNN',
                                                 'CPI-DNN': 'CPI-DNN',
                                                 'CPI-RF': 'CPI-RF',
                                                 'Strobl': 'Conditional-RF',
                                                 'lazyvi': 'Lazy VI',
                                                 'cpi_knockoff': 'cpi-knockoff',
                                                 'loco': 'LOCO'})

df_t1_error_all = pd.read_csv(res_path / 'type1error_blocks_100_allMethods_pred_imp_final.csv')
df_t1_error_all = df_t1_error_all.rename(columns={'method': 'Method', 'prob_data':'Problem Data'})
# df_t1_error_all['Problem Data'] = df_t1_error_all['Problem Data'].map({'classification':'Classification',
#                                                              'regression': 'Plain linear',
#                                                              'regression_relu': 'Regression with ReLu',
#                                                              'regression_product': 'Interactions only',
#                                                              'regression_combine': 'Main effects and Interactions'})
# df_t1_error_all['Method'] = df_t1_error_all['Method'].map({'Marg':'Marginal',
#                                                            'd0CRT': 'd0CRT',
#                                                            'Permfit-DNN': 'Permfit-DNN',
#                                                            'CPI-DNN': 'CPI-DNN',
#                                                            'CPI-RF': 'CPI-RF',
#                                                            'Strobl': 'Conditional-RF',
#                                                            'lazyvi': 'Lazy VI',
#                                                            'cpi_knockoff': 'cpi-knockoff',
#                                                            'loco': 'LOCO'})

In [14]:
df_auc_all.sample(10)

Unnamed: 0,Method,correlation,n_samples,Problem Data,iteration,prob_type,V1
3427,CPI-RF,0.8,1000,Regression with ReLu,28,regression,0.63
4103,LOCO,0.8,1000,Plain linear,4,regression,0.513333
3654,cpi-knockoff,0.8,1000,Plain linear,55,regression,0.646667
1789,Conditional-RF,0.8,1000,Main effects and Interactions,90,regression,0.745
4249,LOCO,0.8,1000,Main effects and Interactions,50,regression,0.483333
332,Permfit-DNN,0.8,1000,Interactions only,33,regression,0.983333
2041,d0CRT,0.8,1000,Classification,42,classification,0.87
99,Permfit-DNN,0.8,1000,Classification,100,classification,0.746667
1881,Conditional-RF,0.8,1000,Interactions only,82,regression,0.846667
4349,LOCO,0.8,1000,Interactions only,50,regression,0.848333


In [15]:
df_t1_error_all.sample(10)

Unnamed: 0,Method,correlation,n_samples,Problem Data,iteration,prob_type,V1
3155,CPI-RF,0.8,1000,Plain linear,56,regression,0.0
2923,Lazy VI,0.8,1000,Regression with ReLu,24,regression,0.0
743,CPI-DNN,0.8,1000,Main effects and Interactions,44,regression,0.033333
3701,cpi-knockoff,0.8,1000,Main effects and Interactions,2,regression,0.0
4026,LOCO,0.8,1000,Classification,27,classification,0.0
27,Permfit-DNN,0.8,1000,Classification,28,classification,0.3
693,CPI-DNN,0.8,1000,Plain linear,94,regression,0.0
2409,d0CRT,0.8,1000,Regression with ReLu,10,regression,0.066667
2661,Lazy VI,0.8,1000,Plain linear,62,regression,0.033333
398,Permfit-DNN,0.8,1000,Interactions only,99,regression,0.733333


In [16]:
def ymin(x):    return np.quantile(x, 0.25)
def ymax(x):    return np.quantile(x, 0.75)
scenarios = ['Classification', 'Plain linear', 'Regression with ReLu', 'Interactions only', 'Main effects and Interactions']
methods = ['Marginal', 'd0CRT', 'Conditional-RF', 'Lazy VI', 'LOCO', 'cpi-knockoff', 'CPI-RF', 'Permfit-DNN', 'CPI-DNN']
df_auc_all_agg = df_auc_all.groupby(['Method', 'Problem Data', 'prob_type'])['V1'].agg([np.mean, ymin, ymax]).reset_index()
df_auc_all_agg = df_auc_all_agg.set_index('Problem Data').loc[scenarios].reset_index()
df_auc_all_agg = df_auc_all_agg.set_index('Method').loc[methods].reset_index()
df_auc_all_agg.head(20)

Unnamed: 0,Method,Problem Data,prob_type,mean,ymin,ymax
0,Marginal,Classification,classification,0.6351,0.57625,0.692083
1,Marginal,Plain linear,regression,0.641517,0.587917,0.7075
2,Marginal,Regression with ReLu,regression,0.637183,0.575833,0.710417
3,Marginal,Interactions only,regression,0.525383,0.471667,0.582083
4,Marginal,Main effects and Interactions,regression,0.607833,0.546667,0.671667
5,d0CRT,Classification,classification,0.856883,0.8275,0.888333
6,d0CRT,Plain linear,regression,0.90835,0.873333,0.952083
7,d0CRT,Regression with ReLu,regression,0.882283,0.85,0.925417
8,d0CRT,Interactions only,regression,0.516917,0.456667,0.575417
9,d0CRT,Main effects and Interactions,regression,0.652783,0.598333,0.7075


In [17]:
df_t1_error_all_agg = df_t1_error_all.groupby(['Method', 'Problem Data', 'prob_type'])['V1'].agg([np.mean, ymin, ymax]).reset_index()
df_t1_error_all_agg = df_t1_error_all_agg.set_index('Problem Data').loc[scenarios].reset_index()
df_t1_error_all_agg = df_t1_error_all_agg.set_index('Method').loc[methods].reset_index()
df_t1_error_all_agg.head(20)

Unnamed: 0,Method,Problem Data,prob_type,mean,ymin,ymax
0,Marginal,Classification,classification,0.671,0.566667,0.8
1,Marginal,Plain linear,regression,0.709333,0.625,0.833333
2,Marginal,Regression with ReLu,regression,0.680333,0.566667,0.8
3,Marginal,Interactions only,regression,0.075,0.033333,0.1
4,Marginal,Main effects and Interactions,regression,0.367,0.266667,0.466667
5,d0CRT,Classification,classification,0.051333,0.033333,0.066667
6,d0CRT,Plain linear,regression,0.079667,0.033333,0.1
7,d0CRT,Regression with ReLu,regression,0.049333,0.033333,0.066667
8,d0CRT,Interactions only,regression,0.026,0.0,0.033333
9,d0CRT,Main effects and Interactions,regression,0.025667,0.0,0.033333


In [18]:
marker_size = 200
err_size = 2
height = 175
width= 125
scheme = 'tableau10'
points_a = alt.Chart(
    df_t1_error_all_agg
).mark_point(
    size=marker_size, opacity=1, fill='white'
).encode(
    x=alt.X('Problem Data:O', title=None, scale=alt.Scale(domain=scenarios[::-1]),
            axis=alt.Axis(labels=False, title=None, tickSize=0)),
    y=alt.Y('mean:Q', 
            title='Type-I error',scale=alt.Scale(domain=(0, 1.0))),
    color=alt.Color('Problem Data:O', title='Scenario',
                    scale=alt.Scale(scheme=scheme, domain=scenarios[::-1]))
).properties(
    height=height,
    width=width
)

error_a = alt.Chart(
    df_t1_error_all_agg
).mark_errorbar(
    opacity=1
).encode(
    x=alt.X('Problem Data:O', title=None, scale=alt.Scale(domain=scenarios[::-1])),
    y=alt.Y('ymin:Q',  title='Type-I error',scale=alt.Scale(domain=(0, 1.0))),
    y2=alt.Y2('ymax:Q', title='Type-I error'),
    color=alt.Color('Problem Data:O', title='Scenario',
                    scale=alt.Scale(scheme=scheme, domain=scenarios[::-1])),
    strokeWidth=alt.value(err_size)
).properties(
    height=height,
    width=width
)

rule1= alt.Chart(pd.DataFrame({
  'V1': [0.05],
  'Problem Data': ['Type-I error nominal rate'],
  'color': ['black']
})).mark_rule(size=1.5, color='black', strokeDash=[5, 5]).encode(
  y='V1:Q',
  color = alt.value('black')
)

fig_t1_all = (error_a + points_a + rule1).facet(
    column=alt.Column('Method:O', sort=methods[::-1])
).properties(title='A')

## AUC ##
points_b = alt.Chart(
    df_auc_all_agg
    
).mark_point(
    size=marker_size, opacity=1, fill='white'
).encode(
    x=alt.X('Problem Data:O', title=None, scale=alt.Scale(domain=scenarios[::-1])),
    y=alt.Y('mean:Q', 
            title='AUC',scale=alt.Scale(domain=(0.4, 1.0))),
    color=alt.Color('Problem Data:O', title='Scenario',
                    scale=alt.Scale(scheme=scheme, domain=scenarios[::-1]))
).properties(
    height=height,
    width=width
)

error_b = alt.Chart(
    df_auc_all_agg
).mark_errorbar(
    size=err_size, opacity=1
).encode(
    x=alt.X('Problem Data:N',
            title=None,
            scale=alt.Scale(domain=scenarios[::-1]),
            axis=alt.Axis(labels=False, title=None, tickSize=0)),
    y=alt.Y('ymin:Q',  title='AUC', scale=alt.Scale(domain=(0.4, 1.0))),
    y2=alt.Y2('ymax:Q', title='AUC'),
    color=alt.Color('Problem Data:O', title='Scenario',
                    scale=alt.Scale(scheme=scheme, domain=scenarios[::-1])),
    strokeWidth=alt.value(err_size)
).properties(
    height=height,
    width=width
)

rule2 = alt.Chart(pd.DataFrame({
  'V1': [0.5],
  'Problem Data': ['Type-I error nominal rate'],
  'color': ['black']
})).mark_rule(size=1.5, color='black').encode(
  y='V1:Q'
)

fig_auc_all = (error_b + points_b + rule2).facet(
    column=alt.Column('Method:O', title=None, sort=methods[::-1],
                header=alt.Header(title=None, labels=False))
).properties(title='B')

# fig_3 = fig_t1_all | fig_auc_all

my_font = 'Helvetica'
fig3 = alt.vconcat(
    fig_t1_all,
    fig_auc_all
).configure_axis(
    grid=True, 
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    labelFontSize=28,
    titleFontSize=32,
    labelLimit=0,
    titlePadding=20
).configure_header(
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    titleFontSize=32,
    labelFontSize=28,
    labelPadding=35,
).configure_view(
    strokeWidth=0
).configure_title(
    font=my_font,
    fontSize=28
).configure_legend(
    titleFontSize=28,
    labelFontSize=24,
    orient ='top',
#     columns=3,
    labelLimit=0,
    columnPadding=45,
    symbolSize=150,
)

# fig3.save('figure_3.svg')
# fig3.save('figure_3.png', scale_factor=3)
fig3