In [20]:
# Add parent directory to Python path to import nc_csf module
import sys
from pathlib import Path

# Get the parent directory (where nc_csf folder is located)
parent_dir = Path.cwd().parent
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))
    print(f"Added to path: {parent_dir}")

from nc_csf.data_generation import SynthConfig, generate_synthetic_nc_cox

In [21]:
# Case 1
# satisfies Ci ⊥ Ti | (Xi, Ai), Ci ⊥ Ui | (Xi, Ai)

cfg = SynthConfig(
    n=5000, p_x=10, seed=123,
    a_prevalence=0.5, gamma_u_in_a=0.9,
    k_t=1.5, lam_t=0.4, tau_log_hr=-0.7, beta_u_in_t=1.1,
    k_c=1.2, lam_c=None, beta_u_in_c=0, target_censor_rate=0.4, 
    max_censor_calib_iter=60, censor_lam_lo=1e-8, censor_lam_hi=1e6, 
    admin_censor_time=None,
    aZ=1.5, sigma_z=1.125, aW=1.5, sigma_w=1.53,
    linear_treatment=True, linear_outcome=True
)

observed_df_1, truth_df_1, params_1 = generate_synthetic_nc_cox(cfg)

In [22]:
observed_df_1.head()

Unnamed: 0,time,event,A,W,Z,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9
0,0.233964,1,0,0.473513,4.22587,-0.989121,-0.367787,1.287925,0.193974,0.920231,0.577104,-0.636464,0.541952,-0.316595,-0.322389
1,0.417625,1,0,0.528272,-0.07562,0.097167,-1.52593,1.192166,-0.67109,1.000269,0.136321,1.532033,-0.659969,-0.311795,0.337769
2,0.16191,1,0,4.207707,1.564615,-2.207471,0.827921,1.54163,1.126807,0.75477,-0.145978,1.281902,1.074031,0.392621,0.005114
3,0.13745,1,1,-3.648982,-1.748814,-0.361767,-1.230232,1.226229,-2.172044,-0.370147,0.16438,0.859881,1.761661,0.993324,-0.291521
4,0.57132,1,0,-4.637285,-2.183443,0.728128,-1.2616,1.429939,-0.156475,-0.673759,-0.63906,-0.061361,-0.392785,2.28991,-0.718181


In [23]:
truth_df_1.head()

Unnamed: 0,U,time,event,A,W,Z,X0,X1,X2,X3,...,X8,X9,T0,T1,C0,C1,T,C,eta_t0,eta_t1
0,0.208422,0.233964,1,0,0.473513,4.22587,-0.989121,-0.367787,1.287925,0.193974,...,-0.316595,-0.322389,0.233964,0.373096,1.343025,1.343025,0.233964,1.343025,0.182225,-0.517775
1,0.497019,0.417625,1,0,0.528272,-0.07562,0.097167,-1.52593,1.192166,-0.67109,...,-0.311795,0.337769,0.417625,0.665975,0.479252,0.479252,0.417625,0.479252,0.501035,-0.198965
2,1.058766,0.16191,1,0,4.207707,1.564615,-2.207471,0.827921,1.54163,1.126807,...,0.392621,0.005114,0.16191,0.258193,2.727646,2.727646,0.16191,2.727646,1.151343,0.451343
3,0.026861,0.13745,1,1,-3.648982,-1.748814,-0.361767,-1.230232,1.226229,-2.172044,...,0.993324,-0.291521,0.086193,0.13745,0.451038,0.451038,0.13745,0.451038,1.890915,1.190915
4,-1.584612,0.57132,1,0,-4.637285,-2.183443,0.728128,-1.2616,1.429939,-0.156475,...,2.28991,-0.718181,0.57132,0.911066,3.333963,3.333963,0.57132,3.333963,-0.676507,-1.376507


In [24]:
# Case 2
# Ui affects censoring, satisfies Ci ⊥ Ti | (Xi, Ai, Ui)

cfg = SynthConfig(
    n=5000, p_x=10, seed=123,
    a_prevalence=0.5, gamma_u_in_a=0.9,
    k_t=1.5, lam_t=0.4, tau_log_hr=-0.7, beta_u_in_t=1.1,
    k_c=1.2, lam_c=None, beta_u_in_c=0.6, target_censor_rate=0.4, 
    max_censor_calib_iter=60, censor_lam_lo=1e-8, censor_lam_hi=1e6, 
    admin_censor_time=None,
    aZ=1.5, sigma_z=1.125, aW=1.5, sigma_w=1.53,
    linear_treatment=True, linear_outcome=True
)

observed_df_2, truth_df_2, params_2 = generate_synthetic_nc_cox(cfg)

In [25]:
observed_df_2.head()

Unnamed: 0,time,event,A,W,Z,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9
0,0.233964,1,0,0.473513,4.22587,-0.989121,-0.367787,1.287925,0.193974,0.920231,0.577104,-0.636464,0.541952,-0.316595,-0.322389
1,0.365584,0,0,0.528272,-0.07562,0.097167,-1.52593,1.192166,-0.67109,1.000269,0.136321,1.532033,-0.659969,-0.311795,0.337769
2,0.16191,1,0,4.207707,1.564615,-2.207471,0.827921,1.54163,1.126807,0.75477,-0.145978,1.281902,1.074031,0.392621,0.005114
3,0.13745,1,1,-3.648982,-1.748814,-0.361767,-1.230232,1.226229,-2.172044,-0.370147,0.16438,0.859881,1.761661,0.993324,-0.291521
4,0.57132,1,0,-4.637285,-2.183443,0.728128,-1.2616,1.429939,-0.156475,-0.673759,-0.63906,-0.061361,-0.392785,2.28991,-0.718181


In [26]:
truth_df_2.head()

Unnamed: 0,U,time,event,A,W,Z,X0,X1,X2,X3,...,X8,X9,T0,T1,C0,C1,T,C,eta_t0,eta_t1
0,0.208422,0.233964,1,0,0.473513,4.22587,-0.989121,-0.367787,1.287925,0.193974,...,-0.316595,-0.322389,0.233964,0.373096,1.183518,1.183518,0.233964,1.183518,0.182225,-0.517775
1,0.497019,0.365584,0,0,0.528272,-0.07562,0.097167,-1.52593,1.192166,-0.67109,...,-0.311795,0.337769,0.417625,0.665975,0.365584,0.365584,0.417625,0.365584,0.501035,-0.198965
2,1.058766,0.16191,1,0,4.207707,1.564615,-2.207471,0.827921,1.54163,1.126807,...,0.392621,0.005114,0.16191,0.258193,1.571192,1.571192,0.16191,1.571192,1.151343,0.451343
3,0.026861,0.13745,1,1,-3.648982,-1.748814,-0.361767,-1.230232,1.226229,-2.172044,...,0.993324,-0.291521,0.086193,0.13745,0.435241,0.435241,0.13745,0.435241,1.890915,1.190915
4,-1.584612,0.57132,1,0,-4.637285,-2.183443,0.728128,-1.2616,1.429939,-0.156475,...,2.28991,-0.718181,0.57132,0.911066,7.201192,7.201192,0.57132,7.201192,-0.676507,-1.376507


In [34]:
observed_df_1.to_csv('observed_df_case1.csv', index=False)
truth_df_1.to_csv('truth_df_case1.csv', index=False)
observed_df_2.to_csv('observed_df_case2.csv', index=False)
truth_df_2.to_csv('truth_df_case2.csv', index=False)