# Simulation 2: Partial linear regression model 


Model description: 
$$
    Y^{(j)} = D^{(j)} \beta + \gamma_j(X^{(j)}) + U^{(j)}, \quad E[U^{(j)} | X^{(j)}, D^{(j)}], \\
    D^{(j)} = \mu_j(X^{(j)}) + V^{(j)}, \quad E[V^{(j)} | X^{(j)}]. \\
$$
    

In [2]:
import pandas as pd
import holoviews as hv
from holoviews import dim
hv.extension('bokeh')

## Scenario 1
Basic parameter setting: $K = 5$, $n = 100$, $c_j = 0.25$, $100$ replications.

In [19]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_iter100.csv'
)

out.set_index('rnd', inplace=True)
out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='orange',
)

(boxwhisker * plt_hline).opts(default_tools=[])

Parameter setting: $n = 100 \rightarrow 500$.

In [2]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n500_iter100.csv'
)

out.set_index('rnd', inplace=True)
out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Comparison: sample size $100$ and $500$ in each site.

In [3]:
out100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_iter100.csv'
)
out100['n'] = 100

out500 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n500_iter100.csv'
)
out500['n'] = 500

out_comp = pd.concat([out100, out500], axis=0)

out_comp.set_index(['rnd', 'n'], inplace=True)
out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, ['n', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

(boxwhisker * plt_hline).opts(default_tools=[])

Comparison: random seed `128`-`227` v.s. `2023`-`2122`.

In [9]:
out128 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_iter100.csv'
)
out128['rnd_seed'] = 128

out2023 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_iter100_v2.csv'
)
out2023['rnd_seed'] = 2023

out_comp = pd.concat([out128, out2023], axis=0)

out_comp.set_index(['rnd', 'rnd_seed'], inplace=True)
out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, ['rnd_seed', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

(boxwhisker * plt_hline).opts(default_tools=[])

## Scenario 2: random data splitting

Setting: once random data splitting

In [None]:

out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)

out = out[out['rnd_ds'] == 128]

out = out.drop('rnd_ds', axis=1)

out.set_index('rnd_np', inplace=True)


out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline


Setting: $10$ random data splittings.

In [5]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)

# print(out.describe())

out_mean = out.groupby('rnd_np').mean()
out_mean['type'] = 'Mean'

out_median = out.groupby('rnd_np').median()
out_median['type'] = 'Median'

out_comp = pd.concat([out_mean, out_median])


out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'type'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, ['type', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Sanity check: non-random data splitting

In [6]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_nds.csv',
)

out[out['rnd_ds'] == 128]

out = out.drop('rnd_ds', axis=1)

out.set_index('rnd_np', inplace=True)


out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Sanity check: once data splitting

In [10]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)

out_filter = out[out['rnd_ds'] <= 130]

out_filter = out_filter.groupby(['rnd_np', 'rnd_ds']).mean()

out_long = out_filter.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, ['rnd_ds', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(1.5, 2.5),
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Conclusion: once data splitting is nto stable.

Comparison of $10$-$20$-$50$ random data splittings

In [None]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_M5_rnp100_rds50.csv',
)

out10 = out[out['rnd_ds'] < 138].groupby('rnd_np').mean()
out10['iter'] = 10

out20 = out[out['rnd_ds'] < 148].groupby('rnd_np').mean()
out20['iter'] = 20

out50 = out.groupby('rnd_np').mean()
out50['iter'] = 50

out_comp = pd.concat([out10, out20, out50], axis=0)

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'iter'], inplace=True)


out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['iter', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=800, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Comparison of $n=100$-$200$ sample size of each block

In [14]:
out100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)
out100['n'] = 100

out200 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n200_rnp100_rds10.csv',
)
out200['n'] = 200

out_comp = pd.concat([out100, out200], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'n']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['n', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline


Comparison: data replications `100` v.s. `200` with `n=200` and `n_rds=10`.
    

In [19]:
out100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n200_rnp100_rds10.csv',
)
out100['n_rnp'] = 100

out200 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n200_rnp200_rds10.csv',
)
out200['n_rnp'] = 200

out_comp = pd.concat([out100, out200], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'n_rnp']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['n_rnp', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Conclusion: the results with `n = 200`, `n_rnp = 100` is stable.

Comparison: uncorrelated confounders and correlated confounders.

In [13]:
out_uncor = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)
out_uncor['cor_coef'] = 0

out_cor = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10_corX.csv',
)
out_cor['cor_coef'] = 0.7

out_comp = pd.concat([out_uncor, out_cor], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'cor_coef']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['cor_coef', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline


Conclusion: No significant difference.

Comparison: single-, double-, oracle-double- density estimation

In [61]:
label_method = ['Average', 'M1', 'M2']
label_denest = ['single', 'double', 'ora-dou']

def hook_change_order(plot, element):
    factors =  ((x1, x2) for x1 in label_denest for x2 in label_method)
    factors_append = (('ora-dou', x) for x in ['M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'M10'])
    plot.state.x_range.factors = [*factors, *factors_append]

out_sd = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)
out_sd['DenEst'] = 'single'

out_dd = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_ad_K5_n100_rnp100_rds10.csv',
)
out_dd['DenEst'] = 'double'

out_odd = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K5_n100_rnp100_rds10_M10.csv',
)
out_odd['DenEst'] = 'ora-dou'

out_comp = pd.concat([out_sd, out_dd, out_odd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'DenEst']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['DenEst', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=800, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    hooks=[hook_change_order],
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: single- double- and oracle-double-density estimation with sample size $100$ or $200$.

In [62]:
label_n = ['100', '200']
label_method = ['Average', 'M1', 'M2']
label_denest = ['single', 'double', 'ora-dou']

def hook_change_order(plot, element):
    factors_dou =  ((x1, x2) for x1 in label_denest for x2 in label_method)
    factors_dou_app = (('ora-dou', x) for x in ['M3', 'M4', 'M5'])
    factors_dou_com = [*factors_dou, *factors_dou_app]
    factors_tri = ((x1, x2, x3) for x1 in label_n for x2, x3 in factors_dou_com)
    plot.state.x_range.factors = [*factors_tri]

out_sd_100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n100_rnp100_rds10.csv',
)
out_sd_100['DenEst'] = 'single'

out_dd_100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_ad_K5_n100_rnp100_rds10.csv',
)
out_dd_100['DenEst'] = 'double'

out_odd_100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K5_n100_rnp100_rds10_M5.csv',
)
out_odd_100['DenEst'] = 'ora-dou'

out_sd_200 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n200_rnp100_rds10.csv',
)
out_sd_200['DenEst'] = 'single'

out_dd_200 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_ad_K5_n200_rnp100_rds10.csv',
)
out_dd_200['DenEst'] = 'double'

out_odd_200 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K5_n200_rnp100_rds10_M5.csv',
)
out_odd_200['DenEst'] = 'ora-dou'

out_100 = pd.concat([out_sd_100, out_dd_100, out_odd_100], axis=0)
out_100['n'] = 100
out_200 = pd.concat([out_sd_200, out_dd_200, out_odd_200], axis=0)
out_200['n'] = 200

out_comp = pd.concat([out_100, out_200], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'DenEst', 'n']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['n', 'DenEst', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    hooks=[hook_change_order],
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

Conclusion: 
- the difference between single-equation and double-equation density estimaiton is minor, 
- there exists some fluctration for oracle-double density estimation

Comparison: number of data sites $5$, $10$, ~~20 and 50~~.

- sample size $n=1000$, $\text{dim}(X) = 60$.

In [3]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n1000_p60_rnp100_rds10_rft200.csv',
)
out_1['K'] = 5
out_1['n_rft'] = 200

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n1000_p60_rnp100_rds10_rft500.csv',
)
out_2['K'] = 5
out_2['n_rft'] = 500

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K10_n1000_p60_rnp100_rds10_rft200.csv',
)
out_3['K'] = 10
out_3['n_rft'] = 200

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K10_n1000_p60_rnp100_rds10_rft500.csv',
)
out_4['K'] = 10
out_4['n_rft'] = 500

# print("n1000_rtf200: ", out_3.shape[0] / 10, "%")
# print("n1000_rtf500: ", out_4.shape[0] / 10, "%")


out_comp = pd.concat([out_1, out_2, out_3, out_4], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'K', 'n_rft']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['K', 'n_rft', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


In [None]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n200_p60_rnp100_rds10_rft200.csv',
)
out_1['n'] = 200
out_1['n_rft'] = 200

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n200_p60_rnp100_rds10_rft500.csv',
)
out_2['n'] = 200
out_2['n_rft'] = 500

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n500_p60_rnp100_rds10_rft200.csv',
)
out_3['n'] = 500
out_3['n_rft'] = 200

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n500_p60_rnp100_rds10_rft500.csv',
)
out_4['n'] = 500
out_4['n_rft'] = 500

out_5 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n1000_p60_rnp100_rds10_rft200.csv',
)
out_5['n'] = 1000
out_5['n_rft'] = 200

out_6 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n1000_p60_rnp100_rds10_rft500.csv',
)
out_6['n'] = 1000
out_6['n_rft'] = 500

out_7 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n2000_p60_rnp100_rds10_rft200.csv',
)
out_7['n'] = 2000
out_7['n_rft'] = 200

out_8 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n2000_p60_rnp100_rds10_rft500.csv',
)
out_8['n'] = 2000
out_8['n_rft'] = 500

# print(out_6)


out_comp = pd.concat([out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'n', 'n_rft']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['n', 'n_rft', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1200, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline




Setting: 
- $K \in \{5, 10\}$,
- $p = 60$, 
- the number of trees in random forest $\in \{200, 500\}$, 
- true value of density ratio, not estimation.

In [25]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K5_n1000_p60_rnp100_rds10_rft200.csv',
)
out_1['K'] = 5
out_1['n_rft'] = 200

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K5_n1000_p60_rnp100_rds10_rft500.csv',
)
out_2['K'] = 5
out_2['n_rft'] = 500

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K10_n1000_p60_rnp100_rds10_rft200.csv',
)
out_3['K'] = 10
out_3['n_rft'] = 200

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_odd_K10_n1000_p60_rnp100_rds10_rft500.csv',
    
)
out_4['K'] = 10
out_4['n_rft'] = 500

# print("K5_rtf200: ", out_1.shape[0] / 10, "%")
# print("K5_rtf500: ", out_2.shape[0] / 10, "%")
# print("K10_rtf200: ", out_3.shape[0] / 10, "%")
# print("K10_rtf500: ", out_4.shape[0] / 10, "%")


out_comp = pd.concat([out_1, out_2, out_3, out_4], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'K', 'n_rft']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['K', 'n_rft', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


Setting: 
- $K \in \{5, 10\}$,
- $p = 20$, 
- the number of trees in random forest = $200$ or $500$.

In [None]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n1000_p20_rnp100_rds10_rft200.csv',
)
out_1['K'] = 5
out_1['n_rft'] = 200

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K5_n1000_p20_rnp100_rds10_rft500.csv',
)
out_2['K'] = 5
out_2['n_rft'] = 500

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K10_n1000_p20_rnp100_rds10_rft200.csv',
)
out_3['K'] = 10
out_3['n_rft'] = 200

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2_org_ini_K10_n1000_p20_rnp100_rds10_rft500.csv',
)
out_4['K'] = 10
out_4['n_rft'] = 500

# print("K5_rtf200: ", out_1.shape[0] / 10, "%")
# print("K5_rtf500: ", out_2.shape[0] / 10, "%")
# print("K10_rtf200: ", out_3.shape[0] / 10, "%")
# print("K10_rtf500: ", out_4.shape[0] / 10, "%")


out_comp = pd.concat([out_1, out_2, out_3, out_4], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'K', 'n_rft']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['K', 'n_rft', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline




Setting: 
- $K \in \{5, 10\}$, and $p \in \{20, 50\}$, 
- correlated $X$ with covariance matrix $\Sigma$ with matrix entry $\Sigma_{ij}=\rho^{|i-j|}$ and $\rho = 0.7$,

In [4]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K5_p20_n1000_rnp100_rds10_rft200.csv',
)
out_1['K'] = 5

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p20_n1000_rnp100_rds10_rft200.csv',
)
out_2['K'] = 10


out_comp = pd.concat([out_1, out_2], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'K']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['K', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


FileNotFoundError: [Errno 2] No such file or directory: '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K5_p20_n1000_rnp100_rds10_rft200.csv'

In [None]:
# label_method = ['Average', 'M1', 'M2']
# label_K = ['10', '5']
# label_p = ['20', '50']

# def hook_change_order(plot, element):
#     factors =  ((x1, x2, x3) for x1 in label_p for x2 in label_K for x3 in label_method)
#     plot.state.x_range.factors = [*factors]


out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K5_p20_n1000_rnp100_rds10_rft200.csv',
)
out_1['K'] = 5
out_1['p'] = 20

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K5_p50_n1000_rnp100_rds10_rft200.csv',
)
out_2['K'] = 5
out_2['p'] = 50

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p20_n1000_rnp100_rds10_rft200.csv',
)
out_3['K'] = 10
out_3['p'] = 20

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p50_n1000_rnp100_rds10_rft200.csv',
)
out_4['K'] = 10
out_4['p'] = 50

# print("K5_p20: ", out_1.shape[0] / 10, "%")
# print("K5_p50: ", out_2.shape[0] / 10, "%")
# print("K10_p20: ", out_3.shape[0] / 10, "%")
# print("K10_p50: ", out_4.shape[0] / 10, "%")

out_comp = pd.concat([out_1, out_2, out_3, out_4], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'K', 'p']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['p', 'K', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline




auxiliary: rnp $= 100 \rightarrow 500$,

In [47]:
# label_method = ['Average', 'M1', 'M2']
# label_K = ['10', '5']
# label_p = ['20', '50']

# def hook_change_order(plot, element):
#     factors =  ((x1, x2, x3) for x1 in label_p for x2 in label_K for x3 in label_method)
#     plot.state.x_range.factors = [*factors]


out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K5_p20_n1000_rnp500_rds10_rft200.csv',
)
out_1['K'] = 5
out_1['p'] = 20

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K5_p50_n1000_rnp500_rds10_rft200.csv',
)
out_2['K'] = 5
out_2['p'] = 50

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p20_n1000_rnp500_rds10_rft200.csv',
)
out_3['K'] = 10
out_3['p'] = 20

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p50_n1000_rnp500_rds10_rft200.csv',
)
out_4['K'] = 10
out_4['p'] = 50

# print("K5_p20: ", out_1.shape[0] / 50, "%")
# print("K5_p50: ", out_2.shape[0] / 50, "%")
# print("K10_p20: ", out_3.shape[0] / 50, "%")
# print("K10_p50: ", out_4.shape[0] / 50, "%")

out_comp = pd.concat([out_1, out_2, out_3, out_4], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'K', 'p']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['p', 'K', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


K5_p20:  100.0 %
K5_p50:  100.0 %
K10_p20:  100.0 %
K10_p50:  94.74 %


In [46]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p50_n1000_rnp100_rds10_rft200.csv',
)
out_1['tag'] = "0-org"

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p50_n1000_rnp500_rds10_rft200.csv',
)
out_2['tag'] = "1-rnp500"

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p50_n1000_rnp100_rds10_rft500.csv',
)
out_3['tag'] = "2-rft500"

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_K10_p50_n2000_rnp100_rds10_rft200.csv',
)
out_4['tag'] = "3-n2000"

# print("0-org:    ", out_1.shape[0] / 10, "%")
# print("1-rnp500: ", out_2.shape[0] / 50, "%")
# print("2-rft500: ", out_3.shape[0] / 10, "%")
# print("3-n2000:  ", out_4.shape[0] / 10, "%")

out_comp = pd.concat([out_1, out_2, out_3, out_4], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'tag']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['tag', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


0-org:     100.0 %
1-rnp500:  94.72 %
2-rft500:  100.0 %
3-n2000:   100.0 %


### Acceleration

In [3]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_x_K5_n1000_p20.csv',
)
out_1['tag'] = "switch"

# print("progress out_1: ", out_1.shape[0] / 10, "%")

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2s_xx_K5_n1000_p20.csv',
)
out_2['tag'] = "no-switch"

# print("progress out_2: ", out_2.shape[0] / 10, "%")


out_comp = pd.concat([out_1, out_2], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'tag']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['tag', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=700, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


progress out_1:  100.0 %
progress out_2:  100.0 %


## Simulation 2s: Covariate Shfit

In [3]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20.csv',
)

print("progress out_1: ", out_1.shape[0] / 10, "%")



out_comp = out_1

# out_comp = out_comp.groupby(['rnd_gen']).median()
out_comp = out_comp.groupby(['rnd_gen']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=400, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


progress out_1:  100.0 %


In [14]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20.csv',
)
out_1['tag'] = "0 - original"

# print("progress out_1: ", out_1.shape[0] / 10, "%")

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p30.csv'
)
out_2['tag'] = "1 - p30"

# print("progress out_2: ", out_2.shape[0] / 10, "%")

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p10.csv'
)
out_3['tag'] = "2 - p10"

print("progress out_3: ", out_3.shape[0] / 10, "%")


out_comp = pd.concat([out_1, out_2, out_3], axis=0)

out_comp = out_comp.groupby(['rnd_gen', 'tag']).median()
# out_comp = out_comp.groupby(['rnd_gen', 'tag']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['tag', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=800, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


progress out_3:  100.0 %


In [16]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20.csv',
)
out_1['tag'] = "0 - original"

# print("progress out_1: ", out_1.shape[0] / 10, "%")

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20_mu10_sd2.csv'
)
out_2['tag'] = "1 - mu10"

# print("progress out_2: ", out_2.shape[0] / 10, "%")

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20_mu1_sd10.csv'
)
out_3['tag'] = "2 - sd10"

# print("progress out_3: ", out_3.shape[0] / 10, "%")

out_4 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20_sd05.csv'
)
out_4['tag'] = "3 - sd05"

# print("progress out_4: ", out_4.shape[0] / 10, "%")

out_5 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20_mu0_sd5.csv'
)
out_5['tag'] = "4 - mu0_sd5"

# print("progress out_5: ", out_5.shape[0] / 10, "%")

out_comp = pd.concat([out_1, out_2, out_3, out_4, out_5], axis=0)

# out_comp = out_comp.groupby(['rnd_gen', 'tag']).median()
out_comp = out_comp.groupby(['rnd_gen', 'tag']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['tag', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


In [17]:
out_1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_K5_n1000_p20.csv',
)
out_1['tag'] = "0 - original"

# print("progress out_1: ", out_1.shape[0] / 10, "%")

out_2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_tsf_K5_n1000_p20.csv'
)
out_2['tag'] = "1 - tsf"

# print("progress out_2: ", out_2.shape[0] / 10, "%")

out_3 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_ora_x_tsf_K5_n1000_p20.csv'
)
out_3['tag'] = "2 - "

# print("progress out_3: ", out_3.shape[0] / 10, "%")

out_comp = pd.concat([out_1, out_2], axis=0)

# out_comp = out_comp.groupby(['rnd_gen', 'tag']).median()
out_comp = out_comp.groupby(['rnd_gen', 'tag']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['tag', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # hooks=[hook_change_order]
)

plt_hline = hv.HLine(0.5)
plt_hline.opts(
    color='black',
    active_tools=[]
)


boxwhisker * plt_hline


## Simulation 2s: Covariate Shift - exponential tilting

oracle covariate density ratio

In [3]:
label_method = ['Average', 'M1', 'M1so']
label_sdcs = ['01', '02', '05']

def hook_change_order(plot, element):
    factors =  ((x1, x2) for x1 in label_sdcs for x2 in label_method)
    plot.state.x_range.factors = [*factors]

out_comp = pd.DataFrame()

for sd_cs in label_sdcs: 
    file_name = '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_expt_ora' 
    file_name = file_name + '_sdcs' + sd_cs
    file_name = file_name + '.csv'

    out = pd.read_csv(file_name)
    out['sd_cs'] = sd_cs

    # print("sd_cs", '{0: <2}'.format(sd_cs), ": ", out.shape[0] / 10, "%")
    
    out_comp = pd.concat([out_comp, out], axis=0)

out_comp.rename(columns={'M2': 'Mso1'}, inplace=True)


out_comp = out_comp.groupby(['rnd_gen', 'sd_cs']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['sd_cs', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(.5)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

oracle covariate density ratio with n = 600

In [4]:
label_method = ['Average', 'M1', 'M2']
label_sdcs = ['01', '02', '05']

out_comp = pd.DataFrame()

for sd_cs in label_sdcs: 
    file_name = '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_expt_ora' 
    file_name = file_name + '_n600'
    file_name = file_name + '_sdcs' + sd_cs
    file_name = file_name + '.csv'

    out = pd.read_csv(file_name)
    out['sd_cs'] = sd_cs

    # print("sd_cs", '{0: <2}'.format(sd_cs), ": ", out.shape[0] / 10, "%")
    
    out_comp = pd.concat([out_comp, out], axis=0)

out_comp.rename(columns={'M2': 'Mso1'}, inplace=True)

out_comp = out_comp.groupby(['rnd_gen', 'sd_cs']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['sd_cs', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=900, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(.5)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

covariate density ratio estimation

In [15]:
label_method = ['Average', 'M1', 'M2']
label_sdcs = ['01', '02', '03']
# label_sdcs = ['01', '02']

out_comp = pd.DataFrame()

for sd_cs in label_sdcs: 
    file_name = '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_expt' 
    # file_name = file_name + '_n600'
    file_name = file_name + '_sdcs' + sd_cs
    file_name = file_name + '.csv'

    out = pd.read_csv(file_name)
    out['sd_cs'] = sd_cs

    # print("sd_cs", '{0: <2}'.format(sd_cs), ": ", out.shape[0] / 10, "%")
    
    out_comp = pd.concat([out_comp, out], axis=0)

out_comp = out_comp.groupby(['rnd_gen', 'sd_cs']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['sd_cs', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=900, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(.5)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

covariate density ratio estimation with n = 600

In [8]:
label_method = ['Average', 'M1', 'M2']
label_sdcs = ['01', '02', '03']

out_comp = pd.DataFrame()

for sd_cs in label_sdcs: 
    file_name = '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_expt' 
    file_name = file_name + '_n600'
    file_name = file_name + '_sdcs' + sd_cs
    file_name = file_name + '.csv'

    out = pd.read_csv(file_name)
    out['sd_cs'] = sd_cs

    # print("sd_cs", '{0: <2}'.format(sd_cs), ": ", out.shape[0] / 10, "%")
    
    out_comp = pd.concat([out_comp, out], axis=0)

out_comp = out_comp.groupby(['rnd_gen', 'sd_cs']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['sd_cs', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=900, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(.5)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

sd_cs 01 :  100.0 %
sd_cs 02 :  100.0 %
sd_cs 03 :  100.0 %


In [57]:
label_method = ['Average', 'M1', 'M2']
label_sdcs = ['01', '03', '05', '08', '1']

out_comp = pd.DataFrame()

for sd_cs in label_sdcs: 
    file_name = '/project/Stat/s1155168529/programs/DDML/output/out_sim2cs_expt2' 
    file_name = file_name + '_sdcs' + sd_cs
    file_name = file_name + '.csv'

    out = pd.read_csv(file_name)
    out['sd_cs'] = sd_cs

    print("sd_cs", '{0: <2}'.format(sd_cs), ": ", out.shape[0] / 10, "%")
    
    out_comp = pd.concat([out_comp, out], axis=0)

out_comp = out_comp.groupby(['rnd_gen', 'sd_cs']).median()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

boxwhisker = hv.BoxWhisker(out_long, ['sd_cs', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=900, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
)

plt_hline = hv.HLine(.5)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

sd_cs 01 :  100.0 %
sd_cs 03 :  100.0 %
sd_cs 05 :  100.0 %
sd_cs 08 :  100.0 %
sd_cs 1  :  100.0 %
