In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

from scipy.integrate import quad
from scipy.interpolate import interp1d

from SyntheticDataModule import *
from estimators import *
from utils import *

In [None]:
rct_size = 500
m = 1
CD = 1
UC = 0

jD = read_json('complete-IC/samePO.json', CD, UC, ["IPCW", "CDR"])
test_signals = jD["test_signals"]


RCTData = SyntheticDataModule(jD['save_df'], CD, rct_size, 0, jD['RCT']['px_dist'], jD['RCT']['px_args'], jD['RCT']['prop_fn'], jD['RCT']['prop_args'], jD['RCT']['tte_params'])
OSData = SyntheticDataModule(jD['save_df'], CD, rct_size * m, 1, jD['OS']['px_dist'], jD['OS']['px_args'], jD['OS']['prop_fn'], jD['OS']['prop_args'], jD['OS']['tte_params'])

df_rct_oracle, df_rct = RCTData.get_df()
df_os_oracle, df_os = OSData.get_df()

df_combined = pd.concat([df_rct, df_os], axis=0, ignore_index=True)  # merge the dataframes into one
df_comb_drop = df_combined.query('Delta == 1').reset_index(drop=True).copy()  # drop the censored observations

# Estimate the nuisance parameters for the combined dataframe

df_combined['P(S=1|X)'] = prop_score_est(df_combined.copy(), 'S', jD['cov_list'], 'logistic')

df_combined.loc[df_combined.S==0, 'P(A=1|X,S)'] = prop_score_est(df_combined.query('S==0').copy(), 'A', jD['cov_list'], 'logistic')
df_combined.loc[df_combined.S==1, 'P(A=1|X,S)'] = prop_score_est(df_combined.query('S==1').copy(), 'A', jD['cov_list'], 'logistic')

cbse, ybse = est_surv(df_combined, jD['cov_list'], tte_model='coxph')
fill_barG(df_combined, jD['cov_list'], cbse)

if any("IPCW" in key for key in test_signals.keys()):
    ipcw_est(df_combined, S=0)
    ipcw_est(df_combined, S=1)

if any("IPW-Impute" in key for key in test_signals.keys()):
    ipw_est(df_combined, S=0, baseline='impute')  # censored observations are IMPUTED
    ipw_est(df_combined, S=1, baseline='impute')  # censored observations are IMPUTED

if any("CDR" in key for key in test_signals.keys()):
    cdr_est(df_combined, jD['cov_list'], cbse, ybse, S=0)  
    cdr_est(df_combined, jD['cov_list'], cbse, ybse, S=1) 

# Estimate the nuisance parameters for the combined dataframe with censored observations dropped
    
if any("IPW-Drop" in key for key in test_signals.keys()):
    df_comb_drop['P(S=1|X)'] = prop_score_est(df_comb_drop.copy(), 'S', jD['cov_list'], 'logistic')

    df_comb_drop.loc[df_comb_drop.S==0, 'P(A=1|X,S)'] = prop_score_est(df_comb_drop.query('S==0').copy(), 'A', jD['cov_list'], 'logistic')
    df_comb_drop.loc[df_comb_drop.S==1, 'P(A=1|X,S)'] = prop_score_est(df_comb_drop.query('S==1').copy(), 'A', jD['cov_list'], 'logistic')

    ipw_est(df_comb_drop, S=0, baseline='drop')  # censored observations are DROPPED
    ipw_est(df_comb_drop, S=1, baseline='drop')  # censored observations are DROPPED

summary_df = pd.concat([RCTData.summary(plot=True), OSData.summary(plot=True)], axis=0, ignore_index=True)
summary_df

In [None]:
a = np.array([3,4,5]) * np.array([1,2,3])

In [None]:
np.cumsum(np.array([1,3,7]))

In [None]:
orig_list = [2, 4, 7, 11, 16, 22, 29]

In [None]:
[b - a for a, b in zip(orig_list, orig_list[1:])]

In [None]:
df_combined

In [None]:
df_combined['P(S=1|X)'].max()

In [None]:
df_new = df_combined
df_new_drop = df_comb_drop

In [None]:
print('IPCW-S0-Y0: {:.2f}'.format(df_new['S0_ipcw_est_Y0'].mean()))
print('IPCW-S0-Y1: {:.2f}'.format(df_new['S0_ipcw_est_Y1'].mean()))
print('IPCW-S0-ATE: {:.2f}\n'.format(df_new['S0_ipcw_est_CATE'].mean()))

print('IPCW-S1-Y0: {:.2f}'.format(df_new['S1_ipcw_est_Y0'].mean()))
print('IPCW-S1-Y1: {:.2f}'.format(df_new['S1_ipcw_est_Y1'].mean()))
print('IPCW-S1-ATE: {:.2f}\n'.format(df_new['S1_ipcw_est_CATE'].mean()))

print('IPW-Impute-S0-Y0: {:.2f}'.format(df_new['S0_impute_ipw_est_Y0'].mean()))
print('IPW-Impute-S0-Y1: {:.2f}'.format(df_new['S0_impute_ipw_est_Y1'].mean()))
print('IPW-Impute-S0-ATE: {:.2f}\n'.format(df_new['S0_impute_ipw_est_CATE'].mean()))

print('IPW-Impute-S1-Y0: {:.2f}'.format(df_new['S1_impute_ipw_est_Y0'].mean()))
print('IPW-Impute-S1-Y1: {:.2f}'.format(df_new['S1_impute_ipw_est_Y1'].mean()))
print('IPW-Impute-S1-ATE: {:.2f}\n'.format(df_new['S1_impute_ipw_est_CATE'].mean()))

print('IPW-Drop-S0-Y0: {:.2f}'.format(df_new_drop['S0_drop_ipw_est_Y0'].mean()))
print('IPW-Drop-S0-Y1: {:.2f}'.format(df_new_drop['S0_drop_ipw_est_Y1'].mean()))
print('IPW-Drop-S0-ATE: {:.2f}\n'.format(df_new_drop['S0_drop_ipw_est_CATE'].mean()))

print('IPW-Drop-S1-Y0: {:.2f}'.format(df_new_drop['S1_drop_ipw_est_Y0'].mean()))
print('IPW-Drop-S1-Y1: {:.2f}'.format(df_new_drop['S1_drop_ipw_est_Y1'].mean()))
print('IPW-Drop-S1-ATE: {:.2f}'.format(df_new_drop['S1_drop_ipw_est_CATE'].mean()))

summary_df

In [None]:
p_thr = 0.05

df_new = df_combined[(p_thr < df_combined['P(S=1|X)']) & (df_combined['P(S=1|X)'] < 1 - p_thr) &\
            (p_thr < df_combined['P(A=1|X,S)']) & (df_combined['P(A=1|X,S)'] < 1 - p_thr)].copy().reset_index(drop=True)

df_new_drop = df_comb_drop[(p_thr < df_comb_drop['P(S=1|X)']) & (df_comb_drop['P(S=1|X)'] < 1 - p_thr) &\
            (p_thr < df_comb_drop['P(A=1|X,S)']) & (df_comb_drop['P(A=1|X,S)'] < 1 - p_thr)].copy().reset_index(drop=True)

In [None]:
len(df_new)

In [None]:
df_combined.sort_values(by='P(S=1|X)')

In [None]:
df_new.sort_values(by='P(S=1|X)')['P(S=1|X)']

In [None]:
x_space = np.linspace(-10,10,401)
cov_name = 'X1'
os_oracle_prop = OSData.calc_oracle_prop(x_space, cov_name)
plt.figure()
plt.plot(x_space, os_oracle_prop)
plt.xlabel(cov_name)
plt.ylabel(f'P(A=1|{cov_name},S=1)')
plt.title(f'Oracle propensity score in study S=1 wrt covariate {cov_name}')
plt.show()

t = np.linspace(0,20,101)
cov_vals = [0, 0, 0, 0]
tbs_Y0 = RCTData.get_oracle_surv_curve(t, cov_vals, 'Y0')
tbs_Y1 = RCTData.get_oracle_surv_curve(t, cov_vals, 'Y1')
tbs_C0 = RCTData.get_oracle_surv_curve(t, cov_vals, 'C0')
tbs_C1 = RCTData.get_oracle_surv_curve(t, cov_vals, 'C1')
plt.figure()
plt.plot(t, tbs_Y0, label='Y0', alpha= 0.4, ls ='--')
plt.plot(t, tbs_Y1, label='Y1', alpha= 1, ls ='-.')
plt.plot(t, tbs_C0, label='C0', alpha= 0.4)
plt.plot(t, tbs_C1, label='C1', alpha= 0.4)
plt.xlabel('t')
plt.ylabel(r'$S(t)$')
plt.title(f'True survival curves in study S=0 with X={cov_vals}')
plt.legend()
plt.show()

In [None]:
t = np.linspace(0,10,101)
cov_vals = np.zeros(11)
tbs_Y0 = RCTData.get_oracle_surv_curve(t, cov_vals, 'Y0')

plt.figure()
plt.plot(t, tbs_Y0, label='Y0', alpha=0.8, ls ='--')
plt.plot(ybse['t_S0_A0'], ybse['St_S0_A0'])
plt.xlabel('t')
plt.ylabel(r'$S(t)$')
plt.title(f'True survival curves in study S=0 with X={cov_vals}')
plt.legend()
plt.show()

In [None]:
t = np.linspace(0,10,101)
cov_vals = [0, 0, 0, 0]
tbs_C1 = RCTData.get_oracle_surv_curve(t, cov_vals, 'C1')

plt.figure()
plt.plot(t, tbs_C1, label='C1', alpha=0.8, ls ='--')
plt.plot(cbse['t_S0_C1'], cbse['St_S0_C1'])
plt.xlabel('t')
plt.ylabel(r'$S(t)$')
plt.title(f'True survival curves in study S=0 with X={cov_vals}')
plt.legend()
plt.show()

In [None]:
t = np.linspace(0,10,101)
cov_vals = [0, 0, 0, 0]
tbs_C0 = OSData.get_oracle_surv_curve(t, cov_vals, 'C0')

plt.figure()
plt.plot(t, tbs_C0, label='C0', alpha=0.8, ls ='--')
plt.plot(cbse['t_S1_C0'], cbse['St_S1_C0'])
plt.xlabel('t')
plt.ylabel(r'$S(t)$')
plt.title(f'True survival curves in study S=0 with X={cov_vals}')
plt.legend()
plt.show()

In [None]:
t = np.linspace(0,10,101)
cov_vals = [0, 0, 0, 0]
tbs_C1 = OSData.get_oracle_surv_curve(t, cov_vals, 'C1')

plt.figure()
plt.plot(t, tbs_C1, label='C1', alpha=0.8, ls ='--')
plt.plot(cbse['t_S1_C1'], cbse['St_S1_C1'])
plt.xlabel('t')
plt.ylabel(r'$S(t)$')
plt.title(f'True survival curves in study S=0 with X={cov_vals}')
plt.legend()
plt.show()

In [None]:
s, a = 0, 0
ty, sty = ybse[f't_S{s}_A{a}'], ybse[f'St_S{s}_A{a}']
tc, stc = cbse[f't_S{s}_A{a}'], cbse[f'St_S{s}_A{a}']

In [None]:
s, a = 0, 1
ty, sty = ybse[f't_S{s}_A{a}'], ybse[f'St_S{s}_A{a}']
tc, stc = cbse[f't_S{s}_A{a}'], cbse[f'St_S{s}_A{a}']

t_arr = tc #
st_arr = stc  

func = interp1d(t_arr, st_arr, kind='nearest-up', fill_value='extrapolate')
result, _ = quad(func, 0, t_arr.max() + 10, limit=100)

print("Result of integration:", result)

xnew = np.arange(0, t_arr.max() + 10, 0.1)
ynew = func(xnew)   # use interpolation function returned by `interp1d`
plt.plot(t_arr, st_arr, 'o', xnew, ynew, '--')
plt.show()

In [None]:
original_array = np.array([1, 3, 7])

shift_cumsum = np.roll(np.cumsum(original_array), 1)
shift_cumsum[0] = 0  
new_array = np.sum(original_array) - shift_cumsum

In [None]:
new_array

In [None]:
np.append(aaa, 1)

In [None]:
aaa = np.append(aaa, 1)

In [None]:
aaa

In [None]:
t_arr.max()

In [None]:
np.unique(stc)

In [None]:
len(tc)

In [None]:
stcder = [stc[i+1] - stc[i] for i in range(len(stc) - 1)]

In [None]:
np.unique(stcder)

In [None]:
x = np.array([1, 2, 4, 7, 11])

# Calculate the derivative using numpy.gradient()
dx = np.gradient(x)

print(dx)

In [None]:
len(np.gradient(stc))

In [None]:
len(stc)

In [None]:
np.gradient(1 - stc)

In [None]:
target_val = 5
t_arr[np.argmin(np.abs(target_val - t_arr))]

In [None]:
np.argmax(t_arr[np.where(t_arr < target_val)[0]])

In [None]:
np.where(t_arr < target_val)[0][-1]

In [None]:
a = np.zeros(80)
k = len(a) // 100
len(a[::k])

In [None]:
import numpy as np

In [None]:
my_dict = {
        "IPCW": ["S0_ipcw_est_CATE", "S1_ipcw_est_CATE"],
        "CDR": ["S0_cdr_est_CATE", "S1_cdr_est_CATE"],
        "IPW-Impute": ["S0_impute_ipw_est_CATE", "S1_impute_ipw_est_CATE"],
        "IPW-Drop": ["S0_drop_ipw_est_CATE", "S1_drop_ipw_est_CATE"]
        }

In [None]:
any("Impute-IPWW" in key for key in my_dict.keys())

In [None]:
{key: value for key, value in my_dict.items() if key in ["IPCW", "CDR"]}