In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import time

from scipy.integrate import quad
from scipy.interpolate import interp1d

from SyntheticDataModule import *
from estimators import *
from utils import *

In [None]:
rct_size = 500
m = 2
CD = 8
UC = 0

jD = read_json('complete-IC/diffPO.json', CD, UC, ["IPCW", "CDR"])
test_signals = jD["test_signals"]

RCTData = SyntheticDataModule(jD['save_df'], CD, rct_size, 0, jD['RCT']['px_dist'], jD['RCT']['px_args'], jD['RCT']['prop_fn'], jD['RCT']['prop_args'], jD['RCT']['tte_params'])
OSData = SyntheticDataModule(jD['save_df'], CD, rct_size * m, 1, jD['OS']['px_dist'], jD['OS']['px_args'], jD['OS']['prop_fn'], jD['OS']['prop_args'], jD['OS']['tte_params'])

df_rct_oracle, df_rct = RCTData.get_df()
df_os_oracle, df_os = OSData.get_df()

df_combined = pd.concat([df_rct, df_os], axis=0, ignore_index=True)  # merge the dataframes into one
df_comb_drop = df_combined.query('Delta == 1').reset_index(drop=True).copy()  # drop the censored observations

# Estimate the nuisance parameters for the combined dataframe

df_combined['P(S=1|X)'] = prop_score_est(df_combined.copy(), 'S', jD['cov_list'], 'logistic')

df_combined.loc[df_combined.S==0, 'P(A=1|X,S)'] = prop_score_est(df_combined.query('S==0').copy(), 'A', jD['cov_list'], 'logistic')
df_combined.loc[df_combined.S==1, 'P(A=1|X,S)'] = prop_score_est(df_combined.query('S==1').copy(), 'A', jD['cov_list'], 'logistic')

Gb_C, Fb_Y = est_surv(df_combined, jD['cov_list'], tte_model='coxph')
fill_barG(df_combined, jD['cov_list'], Gb_C)

if any("IPCW" in key for key in test_signals.keys()):
    ipcw_est(df_combined, S=0)
    ipcw_est(df_combined, S=1)

if any("IPW-Impute" in key for key in test_signals.keys()):
    ipw_est(df_combined, S=0, baseline='impute')  # censored observations are IMPUTED
    ipw_est(df_combined, S=1, baseline='impute')  # censored observations are IMPUTED

t1 = time()

if any("CDR" in key for key in test_signals.keys()):
    cdr_est(df_combined, jD['cov_list'], Gb_C, Fb_Y, S=0)  
    cdr_est(df_combined, jD['cov_list'], Gb_C, Fb_Y, S=1) 
    
t2 = time()
print(f"Time:{t2 - t1}")

# Estimate the nuisance parameters for the combined dataframe with censored observations dropped
    
if any("IPW-Drop" in key for key in test_signals.keys()):
    df_comb_drop['P(S=1|X)'] = prop_score_est(df_comb_drop.copy(), 'S', jD['cov_list'], 'logistic')

    df_comb_drop.loc[df_comb_drop.S==0, 'P(A=1|X,S)'] = prop_score_est(df_comb_drop.query('S==0').copy(), 'A', jD['cov_list'], 'logistic')
    df_comb_drop.loc[df_comb_drop.S==1, 'P(A=1|X,S)'] = prop_score_est(df_comb_drop.query('S==1').copy(), 'A', jD['cov_list'], 'logistic')

    ipw_est(df_comb_drop, S=0, baseline='drop')  # censored observations are DROPPED
    ipw_est(df_comb_drop, S=1, baseline='drop')  # censored observations are DROPPED

summary_df = pd.concat([RCTData.summary(plot=True), OSData.summary(plot=True)], axis=0, ignore_index=True)
summary_df

In [None]:
df_new = df_combined
df_new_drop = df_comb_drop

In [None]:
print('CDR-S0-ATE: {:.2f}\n'.format(df_new['S0_cdr_est_CATE'].mean()))
print('CDR-S1-ATE: {:.2f}\n'.format(df_new['S1_cdr_est_CATE'].mean()))

print('IPCW-S0-Y0: {:.2f}'.format(df_new['S0_ipcw_est_Y0'].mean()))
print('IPCW-S0-Y1: {:.2f}'.format(df_new['S0_ipcw_est_Y1'].mean()))
print('IPCW-S0-ATE: {:.2f}\n'.format(df_new['S0_ipcw_est_CATE'].mean()))

print('IPCW-S1-Y0: {:.2f}'.format(df_new['S1_ipcw_est_Y0'].mean()))
print('IPCW-S1-Y1: {:.2f}'.format(df_new['S1_ipcw_est_Y1'].mean()))
print('IPCW-S1-ATE: {:.2f}\n'.format(df_new['S1_ipcw_est_CATE'].mean()))

# print('IPW-Impute-S0-Y0: {:.2f}'.format(df_new['S0_impute_ipw_est_Y0'].mean()))
# print('IPW-Impute-S0-Y1: {:.2f}'.format(df_new['S0_impute_ipw_est_Y1'].mean()))
# print('IPW-Impute-S0-ATE: {:.2f}\n'.format(df_new['S0_impute_ipw_est_CATE'].mean()))

# print('IPW-Impute-S1-Y0: {:.2f}'.format(df_new['S1_impute_ipw_est_Y0'].mean()))
# print('IPW-Impute-S1-Y1: {:.2f}'.format(df_new['S1_impute_ipw_est_Y1'].mean()))
# print('IPW-Impute-S1-ATE: {:.2f}\n'.format(df_new['S1_impute_ipw_est_CATE'].mean()))

# print('IPW-Drop-S0-Y0: {:.2f}'.format(df_new_drop['S0_drop_ipw_est_Y0'].mean()))
# print('IPW-Drop-S0-Y1: {:.2f}'.format(df_new_drop['S0_drop_ipw_est_Y1'].mean()))
# print('IPW-Drop-S0-ATE: {:.2f}\n'.format(df_new_drop['S0_drop_ipw_est_CATE'].mean()))

# print('IPW-Drop-S1-Y0: {:.2f}'.format(df_new_drop['S1_drop_ipw_est_Y0'].mean()))
# print('IPW-Drop-S1-Y1: {:.2f}'.format(df_new_drop['S1_drop_ipw_est_Y1'].mean()))
# print('IPW-Drop-S1-ATE: {:.2f}'.format(df_new_drop['S1_drop_ipw_est_CATE'].mean()))

summary_df

In [None]:
row = 1743

s = df_combined.loc[row, 'S']
a = df_combined.loc[row, 'A']
x = np.array(df_combined.loc[row, jD['cov_list']])
T = df_combined.loc[row, 'T']
Delta = df_combined.loc[row, 'Delta']

df_combined.loc[row]

In [None]:
eval_surv_(Gb_C[f't_S{s}_A{a}'], Gb_C[f'St_S{s}_A{a}'], T)

In [None]:
eval_int_term_(s, a, x, T, Gb_C, Fb_Y)

In [None]:
eval_Ystar_(s, a, x, Delta, T, Gb_C, Fb_Y)

In [None]:
eval_Qfunc_(s, a, x, T, Fb_Y, thresh=1e-6)

In [None]:
df_combined['Gb(T|X,S,A)'].sort_values()

In [None]:
df_combined[['S1_cdr_est_CATE', 'S1_ipcw_est_CATE']].sort_values(by='S1_cdr_est_CATE')

In [None]:
df_new[['S1_cdr_est_CATE', 'S1_ipcw_est_CATE']].sort_values(by='S1_cdr_est_CATE')

In [None]:
p_thr = 0.05

df_new = df_combined[(p_thr < df_combined['P(S=1|X)']) & (df_combined['P(S=1|X)'] < 1 - p_thr) &\
            (p_thr < df_combined['P(A=1|X,S)']) & (df_combined['P(A=1|X,S)'] < 1 - p_thr) &\
            (p_thr < df_combined['Gb(T|X,S,A)'])].copy().reset_index(drop=True)

# df_new_drop = df_comb_drop[(p_thr < df_comb_drop['P(S=1|X)']) & (df_comb_drop['P(S=1|X)'] < 1 - p_thr) &\
#             (p_thr < df_comb_drop['P(A=1|X,S)']) & (df_comb_drop['P(A=1|X,S)'] < 1 - p_thr)].copy().reset_index(drop=True)

In [None]:
len(df_new)

In [None]:
df_combined.sort_values(by='P(S=1|X)')

In [None]:
s, a = 1, 1
ty, sty = Fb_Y[f't_S{s}_A{a}'], Fb_Y[f'St_S{s}_A{a}']
tc, stc = Gb_C[f't_S{s}_A{a}'], Gb_C[f'St_S{s}_A{a}']

t_arr = tc #
st_arr = stc  

t1 = time()
func = interp1d(t_arr, st_arr, kind='nearest', fill_value='extrapolate')
result, error = quad(func, 0, t_arr.max() + 10, limit=5)

print(f"Time: {time()-t1:.2f} s.")
print(f"Result of integration: {result}, error: {error}")

xnew = np.arange(0, t_arr.max() + 10, 0.1)
ynew = func(xnew)   # use interpolation function returned by `interp1d`
plt.plot(t_arr, st_arr, 'o', xnew, ynew, '--')
plt.show()

In [None]:
s, a = 1, 1
x = np.zeros(CD + 1)
ty, sty = Fb_Y[f't_S{s}_A{a}'], Fb_Y[f'St_S{s}_A{a}']
tc, stc = Gb_C[f't_S{s}_A{a}'], Gb_C[f'St_S{s}_A{a}']

t_arr = ty #
st_arr = sty  

t1 = time()
func = interp1d(t_arr, st_arr, kind='nearest', fill_value='extrapolate')
result, error = quad(func, 0, t_arr.max(), limit=5)

print(f"Time: {time()-t1:.2f} s.")
print(f"Result of integration: {result}, error: {error}")

xnew = np.arange(0, t_arr.max(), 0.1)
ynew = func(xnew)   # use interpolation function returned by `interp1d`
plt.plot(t_arr, st_arr, 'o', xnew, ynew, '--')
plt.show()

In [None]:
Trange = np.linspace(0,10,51)
q_arr_normal = [eval_Qfunc_(s, a, x, T, Fb_Y) for T in Trange]
q_arr_batch = eval_Qfunc_arr_(s, a, x, Trange, Fb_Y)
plt.figure()
plt.plot(Trange, q_arr_normal, label='q_normal')
plt.plot(Trange, q_arr_batch, label='q_batch')
plt.legend()
plt.show()

In [None]:
x_space = np.linspace(-10,10,401)
cov_name = 'X1'
os_oracle_prop = OSData.calc_oracle_prop(x_space, cov_name)
plt.figure()
plt.plot(x_space, os_oracle_prop)
plt.xlabel(cov_name)
plt.ylabel(f'P(A=1|{cov_name},S=1)')
plt.title(f'Oracle propensity score in study S=1 wrt covariate {cov_name}')
plt.show()

t = np.linspace(0,20,101)
cov_vals = [0, 0, 0, 0]
tbs_Y0 = RCTData.get_oracle_surv_curve(t, cov_vals, 'Y0')
tbs_Y1 = RCTData.get_oracle_surv_curve(t, cov_vals, 'Y1')
tbs_C0 = RCTData.get_oracle_surv_curve(t, cov_vals, 'C0')
tbs_C1 = RCTData.get_oracle_surv_curve(t, cov_vals, 'C1')
plt.figure()
plt.plot(t, tbs_Y0, label='Y0', alpha= 0.4, ls ='--')
plt.plot(t, tbs_Y1, label='Y1', alpha= 1, ls ='-.')
plt.plot(t, tbs_C0, label='C0', alpha= 0.4)
plt.plot(t, tbs_C1, label='C1', alpha= 0.4)
plt.xlabel('t')
plt.ylabel(r'$S(t)$')
plt.title(f'True survival curves in study S=0 with X={cov_vals}')
plt.legend()
plt.show()