# Simulation 3: Partial linear IV regression

In [4]:
import pandas as pd
import holoviews as hv
from holoviews import dim
hv.extension('bokeh')

Model description: 
$$
    Y^{(j)} - D^{(j)} \beta = \gamma_j(X^{(j)}) + U^{(j)}, \quad E[U^{(j)} | X^{(j)}, Z^{(j)}], \\
    Z^{(j)} = \mu_j(X^{(j)}) + V^{(j)}, \quad E[V^{(j)} | X^{(j)}]. \\
$$

## Scenario 1

Relationship between instrumental variable $Z$ and treatment $D$: 
$$
    D \sim \text{Normal}(Z, \psi_d)
$$

Setting: $\psi_d = 0.1$ with correlation $\text{Cov}(D, Z) = 0.9945$.

In [None]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid01.csv'
)

out = out.drop('rnd_ds', axis=1)

out.set_index('rnd_np', inplace=True)

out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(1.75, 2.25)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline


Setting: $\psi_d = 9$ with correlation $\text{Cov}(D, Z) = 0.7071$.

In [None]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid9.csv'
)

out = out.drop('rnd_ds', axis=1)

out.set_index('rnd_np', inplace=True)

out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(1.5, 2.5)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Setting: $\psi_d = 36$ with correlation $\text{Cov}(D, Z) = 0.4472$.

In [None]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid36.csv'
)

bdw = 1

out = out.drop('rnd_ds', axis=1)

out.set_index('rnd_np', inplace=True)

out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
)

boxwhisker * plt_hline

Comparison: $\psi_d = 0.1, 9, 36$ wtih $100$ replications and non-random data splitting.

In [6]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid01.csv'
)
out01['psid'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid9.csv'
)
out9['psid'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid36.csv'
)
out36['psid'] = 36

out_comp = pd.concat([out01, out9, out36])

out_comp = out_comp.drop('rnd_ds', axis=1)

out_comp.set_index(['rnd_np', 'psid'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()


bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psid', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: replications $100$ v.s. $500$ for $\psi_d=9$.

In [13]:
out_100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter100_psid9.csv',
)
out_100['rep'] = 100

out_500 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_iter500_psid9.csv',
)
out_500['rep'] = 500

out_comp = pd.concat([out_100, out_500], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'rep']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()


bdw = .8

boxwhisker = hv.BoxWhisker(out_long, ['rep', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


Conclusion: The results with non-random data splitting and $100$ replications are not stable.

Comparison: $\psi_d = 0.1, 9, 36$ with $10$ random data splitting and $100$ data replications.

In [36]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_rnp100_rds10_psid01.csv',
)
out01['psi_d'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_rnp100_rds10_psid9.csv',
)
out9['psi_d'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_rnp100_rds10_psid36.csv',
)
out36['psi_d'] = 36


out_comp = pd.concat([out01, out9, out36], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: $\psi_d = 0.1, 9, 36$ with $200$ sample size, $10$ random data splittings and $100$ data replications.

In [68]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n200_rnp100_rds10_psid01.csv',
)
out01['psi_d'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n200_rnp100_rds10_psid9.csv',
)
out9['psi_d'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n200_rnp100_rds10_psid36.csv',
)
out36['psi_d'] = 36


out_comp = pd.concat([out01, out9, out36], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: sample size $100$ v.s. $200$.

In [3]:
out100_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_rnp100_rds10_psid01.csv',
)
out100_01['psi_d'] = 0.1

out100_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_rnp100_rds10_psid9.csv',
)
out100_9['psi_d'] = 9

out100_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n100_rnp100_rds10_psid36.csv',
)
out100_36['psi_d'] = 36


out200_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n200_rnp100_rds10_psid01.csv',
)
out200_01['psi_d'] = 0.1

out200_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n200_rnp100_rds10_psid9.csv',
)
out200_9['psi_d'] = 9

out200_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_K5_n200_rnp100_rds10_psid36.csv',
)
out200_36['psi_d'] = 36

out_comp_100 = pd.concat([out100_01, out100_9, out100_36], axis=0)
out_comp_100['n'] = 100
out_comp_200 = pd.concat([out200_01, out200_9, out200_36], axis=0)
out_comp_200['n'] = 200

out_comp = pd.concat([out_comp_100, out_comp_200], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d', 'n']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d', 'n'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['n', 'psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

## Scenario 2

Relationship between treatment and instrumental variable: 
$$
    D^{(j)} = Z^{(j)} + \upsilon_j(X^{(j)}) + \epsilon^{(j)}, \quad \epsilon^{(j)} \sim \text{Normal}(0, \psi_d), 
$$
with 
$$
    \upsilon_j(x_i^{(j)}) = \frac{\exp(x_{i, j+1}^{(j)})}{1 + \exp(x_{i, j+1}^{(j)})}.
$$

Setting: $100$ replications with $\psi_d=9$.

In [117]:
out = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_iter100_psid9.csv'
)

label_psid = ['0.1', '9', '36']
label_Method = ['Average', 'M2', 'M1']
label_DenEst = ['single', 'double', 'ora-tri']
    
def hook_change_order(plot,element):
    factors =  ((x1) for x1 in ['Average', 'M2', 'M1'])
    plot.state.x_range.factors = [*factors]

out = out.drop('rnd_ds', axis=1)

out.set_index('rnd_np', inplace=True)

out_long = out.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

boxwhisker = hv.BoxWhisker(out_long, 'Method', 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=300, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(1.5, 2.5),
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[], 
)

boxwhisker * plt_hline

Comparison: replications $100$ v.s. $500$ for $\psi_d=9$.

In [17]:
out_100 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_iter100_psid9.csv',
)
out_100['rep'] = 100

out_500 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_iter500_psid9.csv',
)
out_500['rep'] = 500

out_comp = pd.concat([out_100, out_500], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'rep']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()


bdw = .8

boxwhisker = hv.BoxWhisker(out_long, ['rep', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=500, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline


Conclusion: the result with `n=100`, `n_rnp=100` and non-random data splitting is not stable.

Comparison: $\psi_d = 0.1, 9, 36$ with $10$ random data splitting and $100$ data replications.

In [61]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid01.csv',
)
out01['psi_d'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid9.csv',
)
out9['psi_d'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid36.csv',
)
out36['psi_d'] = 36


out_comp = pd.concat([out01, out9, out36], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: $\psi_d = 0.1, 9, 36$ with 
- $200$ sample size,
- $10$ random data splitting,
- $100$ data replications.

In [59]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid01.csv',
)
out01['psi_d'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid9.csv',
)
out9['psi_d'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid36.csv',
)
out36['psi_d'] = 36


out_comp = pd.concat([out01, out9, out36], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: sample size $100$ v.s. $200$.

In [111]:
out100_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid01.csv',
)
out100_01['psi_d'] = 0.1

out100_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid9.csv',
)
out100_9['psi_d'] = 9

out100_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid36.csv',
)
out100_36['psi_d'] = 36


out200_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid01.csv',
)
out200_01['psi_d'] = 0.1

out200_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid9.csv',
)
out200_9['psi_d'] = 9

out200_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid36.csv',
)
out200_36['psi_d'] = 36

out_comp_100 = pd.concat([out100_01, out100_9, out100_36], axis=0)
out_comp_100['n'] = 100
out_comp_200 = pd.concat([out200_01, out200_9, out200_36], axis=0)
out_comp_200['n'] = 200

out_comp = pd.concat([out_comp_100, out_comp_200], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d', 'n']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d', 'n'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['n', 'psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: density estimation single v.s. double v.s. oracle triple

In [121]:
label_psid = ['0.1', '9', '36']
label_Method = ['Average', 'M1', 'M2']
label_DenEst = ['single', 'double', 'ora-tri']
    
def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_psid for x2 in label_DenEst for x3 in label_Method)
    plot.state.x_range.factors = [*factors]

out_sd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid01.csv',
)
out_sd_01['Psi_d'] = 0.1

out_sd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid9.csv',
)
out_sd_9['Psi_d'] = 9

out_sd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid36.csv',
)
out_sd_36['Psi_d'] = 36


out_dd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_dd_K5_n100_rnp100_rds10_psid01.csv',
)
out_dd_01['Psi_d'] = 0.1

out_dd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_dd_K5_n100_rnp100_rds10_psid9.csv',
)
out_dd_9['Psi_d'] = 9

out_dd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_dd_K5_n100_rnp100_rds10_psid36.csv',
)
out_dd_36['Psi_d'] = 36


out_otd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_otd_K5_n100_rnp100_rds10_psid01.csv',
)
out_otd_01['Psi_d'] = 0.1

out_otd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_otd_K5_n100_rnp100_rds10_psid9.csv',
)
out_otd_9['Psi_d'] = 9

out_otd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_otd_K5_n100_rnp100_rds10_psid36.csv',
)
out_otd_36['Psi_d'] = 36

out_comp_sd = pd.concat([out_sd_01, out_sd_9, out_sd_36], axis=0)
out_comp_sd['DenEst'] = label_DenEst[0]
out_comp_dd = pd.concat([out_dd_01, out_dd_9, out_dd_36], axis=0)
out_comp_dd['DenEst'] = label_DenEst[1]
out_comp_otd = pd.concat([out_otd_01, out_otd_9, out_otd_36], axis=0)
out_comp_otd['DenEst'] = label_DenEst[2]

out_comp = pd.concat([out_comp_sd, out_comp_dd, out_comp_otd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'DenEst']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'DenEst'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

# bdw = .5

boxwhisker = hv.BoxWhisker(
    out_long, ['Psi_d', 'DenEst', 'Method'], 'EST'
)
boxwhisker.opts(
    show_legend=False, 
    width=1200, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[], 
)

boxwhisker * plt_hline

Conclusion: improvement is not significant here.

Comparison: single estimation v.s. oracle triple estimation for sample size $200$

In [6]:
label_psid = ['0.1', '9', '36']
label_Method = ['Average', 'M1', 'M2']
label_DenEst = ['single', 'ora-tri']

def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_psid for x2 in label_DenEst for x3 in label_Method)
    plot.state.x_range.factors = [*factors]

out_sd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid01.csv',
)
out_sd_01['Psi_d'] = 0.1

out_sd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid9.csv',
)
out_sd_9['Psi_d'] = 9

out_sd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n200_rnp100_rds10_psid36.csv',
)
out_sd_36['Psi_d'] = 36


out_otd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_otd_K5_n200_rnp100_rds10_psid01.csv',
)
out_otd_01['Psi_d'] = 0.1

out_otd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_otd_K5_n200_rnp100_rds10_psid9.csv',
)
out_otd_9['Psi_d'] = 9

out_otd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_otd_K5_n200_rnp100_rds10_psid36.csv',
)
out_otd_36['Psi_d'] = 36

out_comp_sd = pd.concat([out_sd_01, out_sd_9, out_sd_36], axis=0)
out_comp_sd['DenEst'] = label_DenEst[0]
out_comp_otd = pd.concat([out_otd_01, out_otd_9, out_otd_36], axis=0)
out_comp_otd['DenEst'] = label_DenEst[1]

out_comp = pd.concat([out_comp_sd, out_comp_otd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'DenEst']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'DenEst'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

# bdw = .5

boxwhisker = hv.BoxWhisker(
    out_long, ['Psi_d', 'DenEst', 'Method'], 'EST'
)
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[], 
)

boxwhisker * plt_hline

## Scenario 3: endogenous

In [4]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid01.csv',
)
out01['psi_d'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid9.csv',
)
out9['psi_d'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid36.csv',
)
out36['psi_d'] = 36

out_comp = pd.concat([out01, out9, out36], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black',
    active_tools=[]
)

boxwhisker * plt_hline

In [7]:
out01_v1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid01.csv',
)
out01_v1['Psi_d'] = 0.1

out9_v1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid9.csv',
)
out9_v1['Psi_d'] = 9

out36_v1 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid36.csv',
)
out36_v1['Psi_d'] = 36


out01_v2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid01_v2.csv',
)
out01_v2['Psi_d'] = 0.1

out9_v2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid9_v2.csv',
)
out9_v2['Psi_d'] = 9

out36_v2 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid36_v2.csv',
)
out36_v2['Psi_d'] = 36

out_comp_v1 = pd.concat([out01_v1, out9_v1, out36_v1], axis=0)
out_comp_v1['Rsd'] = '128'
out_comp_v2 = pd.concat([out01_v2, out9_v2, out36_v2], axis=0)
out_comp_v2['Rsd'] = '2023'

out_comp = pd.concat([out_comp_v1, out_comp_v2], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'Rsd']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'Rsd'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['Rsd', 'Psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

Conclusion: The results with $100$ replications are not such stable.

Comparison: non-endogenous v.s. endogenous

In [11]:
label_psid = ['0.1', '9', '36']
label_method = ['Average', 'M1', 'M2']
label_type = ['Non-Endogenous', 'Endogenous']

def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_type for x2 in label_psid for x3 in label_method)
    plot.state.x_range.factors = [*factors]

out_nedg_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid01.csv',
)
out_nedg_01['Psi_d'] = 0.1

out_nedg_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid9.csv',
)
out_nedg_9['Psi_d'] = 9

out_nedg_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_std_K5_n100_rnp100_rds10_psid36.csv',
)
out_nedg_36['Psi_d'] = 36


out_edg_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid01.csv',
)
out_edg_01['Psi_d'] = 0.1

out_edg_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid9.csv',
)
out_edg_9['Psi_d'] = 9

out_edg_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid36.csv',
)
out_edg_36['Psi_d'] = 36

out_comp_nedg = pd.concat([out_nedg_01, out_nedg_9, out_nedg_36], axis=0)
out_comp_nedg['Type'] = label_type[0]
out_comp_edg = pd.concat([out_edg_01, out_edg_9, out_edg_36], axis=0)
out_comp_edg['Type'] = label_type[1]

out_comp = pd.concat([out_comp_nedg, out_comp_edg], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'Type']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'Type'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['Type', 'Psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw), 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

Conclusion: 
- variance in endogenous case is larger than non-endogenous case.

Comparison: single estimation v.s. joint density estimation with sample size $n=100$

In [5]:
label_method = ['Average', 'M1', 'M2']
label_type = ['single', 'joint']
label_psid = ['0.1', '9', '36']

def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_psid for x2 in label_type for x3 in label_method)
    plot.state.x_range.factors = [*factors]

out_sd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid01.csv',
)
out_sd_01['Psi_d'] = 0.1

out_sd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid9.csv',
)
out_sd_9['Psi_d'] = 9

out_sd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid36.csv',
)
out_sd_36['Psi_d'] = 36


out_jd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n100_rnp100_rds10_psid01.csv',
)
out_jd_01['Psi_d'] = 0.1

out_jd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n100_rnp100_rds10_psid9.csv',
)
out_jd_9['Psi_d'] = 9

out_jd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n100_rnp100_rds10_psid36.csv',
)
out_jd_36['Psi_d'] = 36

out_comp_sd = pd.concat([out_sd_01, out_sd_9, out_sd_36], axis=0)
out_comp_sd['DenEst'] = label_type[0]
out_comp_jd = pd.concat([out_jd_01, out_jd_9, out_jd_36], axis=0)
out_comp_jd['DenEst'] = label_type[1]

out_comp = pd.concat([out_comp_sd, out_comp_jd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'DenEst']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'DenEst'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['Psi_d', 'DenEst', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw), 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

In [6]:
label_method = ['Average', 'M1', 'M2']
label_type = ['single', 'joint']
label_psid = ['0.1', '9', '36']

def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_psid for x2 in label_type for x3 in label_method)
    plot.state.x_range.factors = [*factors]

out_sd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n200_rnp100_rds10_psid01.csv',
)
out_sd_01['Psi_d'] = 0.1

out_sd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n200_rnp100_rds10_psid9.csv',
)
out_sd_9['Psi_d'] = 9

out_sd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n200_rnp100_rds10_psid36.csv',
)
out_sd_36['Psi_d'] = 36


out_jd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n200_rnp100_rds10_psid01.csv',
)
out_jd_01['Psi_d'] = 0.1

out_jd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n200_rnp100_rds10_psid9.csv',
)
out_jd_9['Psi_d'] = 9

out_jd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n200_rnp100_rds10_psid36.csv',
)
out_jd_36['Psi_d'] = 36

out_comp_sd = pd.concat([out_sd_01, out_sd_9, out_sd_36], axis=0)
out_comp_sd['DenEst'] = label_type[0]
out_comp_jd = pd.concat([out_jd_01, out_jd_9, out_jd_36], axis=0)
out_comp_jd['DenEst'] = label_type[1]

out_comp = pd.concat([out_comp_sd, out_comp_jd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'DenEst']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'DenEst'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['Psi_d', 'DenEst', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw), 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline

Comparison: single density equation v.s. joint density estimation with sample size $n=100, 200$.

In [23]:
label_method = ['Average', 'M1', 'M2']
label_type = ['single', 'joint']
label_psid = ['0.1', '9', '36']

def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_psid for x2 in label_type for x3 in label_method)
    plot.state.x_range.factors = [*factors]

out_sd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid01.csv',
)
out_sd_01['Psi_d'] = 0.1

out_sd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid9.csv',
)
out_sd_9['Psi_d'] = 9

out_sd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n100_rnp100_rds10_psid36.csv',
)
out_sd_36['Psi_d'] = 36


out_jd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n100_rnp100_rds10_psid01.csv',
)
out_jd_01['Psi_d'] = 0.1

out_jd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n100_rnp100_rds10_psid9.csv',
)
out_jd_9['Psi_d'] = 9

out_jd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n100_rnp100_rds10_psid36.csv',
)
out_jd_36['Psi_d'] = 36

out_comp_sd = pd.concat([out_sd_01, out_sd_9, out_sd_36], axis=0)
out_comp_sd['DenEst'] = label_type[0]
out_comp_jd = pd.concat([out_jd_01, out_jd_9, out_jd_36], axis=0)
out_comp_jd['DenEst'] = label_type[1]

out_comp = pd.concat([out_comp_sd, out_comp_jd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'DenEst']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'DenEst'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(
    out_long, ['Psi_d', 'DenEst', 'Method'], 'EST', 
    label = 'n = 100'
)
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw), 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

fig100 = boxwhisker * plt_hline


label_method = ['Average', 'M1', 'M2']
label_type = ['single', 'joint']
label_psid = ['0.1', '9', '36']

def hook_change_order(plot, element):
    factors =  ((x1, x2, x3) for x1 in label_psid for x2 in label_type for x3 in label_method)
    plot.state.x_range.factors = [*factors]

out_sd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n200_rnp100_rds10_psid01.csv',
)
out_sd_01['Psi_d'] = 0.1

out_sd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n200_rnp100_rds10_psid9.csv',
)
out_sd_9['Psi_d'] = 9

out_sd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n200_rnp100_rds10_psid36.csv',
)
out_sd_36['Psi_d'] = 36


out_jd_01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n200_rnp100_rds10_psid01.csv',
)
out_jd_01['Psi_d'] = 0.1

out_jd_9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n200_rnp100_rds10_psid9.csv',
)
out_jd_9['Psi_d'] = 9

out_jd_36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_jd_K5_n200_rnp100_rds10_psid36.csv',
)
out_jd_36['Psi_d'] = 36

out_comp_sd = pd.concat([out_sd_01, out_sd_9, out_sd_36], axis=0)
out_comp_sd['DenEst'] = label_type[0]
out_comp_jd = pd.concat([out_jd_01, out_jd_9, out_jd_36], axis=0)
out_comp_jd['DenEst'] = label_type[1]

out_comp = pd.concat([out_comp_sd, out_comp_jd], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'Psi_d', 'DenEst']).median()
# out_comp = out_comp.groupby(['rnd_np', 'Psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'Psi_d', 'DenEst'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

bdw = .5

boxwhisker = hv.BoxWhisker(
    out_long, ['Psi_d', 'DenEst', 'Method'], 'EST', 
    label = 'n = 200'
)
boxwhisker.opts(
    show_legend=False, 
    width=1000, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    ylim=(2 - bdw, 2 + bdw), 
    hooks=[hook_change_order]
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

fig200 = boxwhisker * plt_hline

(fig100 + fig200).cols(1)

Conclusion: 
- debiasing is significant when site-specific $n=200$, 
- joint density estimation is not worse than single equation density estimation.

In [5]:
out01 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n1000_rnp100_rds10_psid01.csv',
)
out01['psi_d'] = 0.1

out9 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n1000_rnp100_rds10_psid9.csv',
)
out9['psi_d'] = 9

out36 = pd.read_csv(
    '/project/Stat/s1155168529/programs/DDML/output/out_sim3_edg_K5_n1000_rnp100_rds10_psid36.csv',
)
out36['psi_d'] = 36


out_comp = pd.concat([out01, out9, out36], axis=0)

out_comp = out_comp.groupby(['rnd_np', 'psi_d']).median()
# out_comp = out_comp.groupby(['rnd_np', 'psi_d']).mean()

out_comp = out_comp.drop('rnd_ds', axis=1).reset_index()

out_comp.set_index(['rnd_np', 'psi_d'], inplace=True)

out_long = out_comp.melt(
    ignore_index=False, var_name='Method', value_name='EST'
).reset_index()

out_long

# bdw = .5

boxwhisker = hv.BoxWhisker(out_long, ['psi_d', 'Method'], 'EST')
boxwhisker.opts(
    show_legend=False, 
    width=600, 
    box_fill_color=dim('Method').str(), 
    cmap='Set1', 
    # ylim=(2 - bdw, 2 + bdw)
)

plt_hline = hv.HLine(2)
plt_hline.opts(
    color='black', 
    active_tools=[]
)

boxwhisker * plt_hline