In [1]:
import sys
import os

# Add the simulations/mcpy directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../simulations')))

import matplotlib.pyplot as plt

from nnpiv.rkhs import RKHSIV, ApproxRKHSIVCV, RKHSIVCV, RKHSIVL2CV, RKHSIVL2, RKHS2IV, RKHS2IVL2

import numpy as np
import simulations.dgps_mediated as dgps


# Now you can import the module
from nnpiv.semiparametrics import DML_mediated

In [37]:

fn_number = 0
tau_fn = dgps.get_tau_fn(fn_number)
tauinv_fn = dgps.get_tauinv_fn(fn_number)
W, Z, X, M, D, Y, tau_fn = dgps.get_data(2000, tau_fn)

V = np.random.rand(Y.shape[0])
V = V.reshape(-1, 1)

print(np.column_stack((W,X,Z)).shape)
ind = np.where(D==0)[0]
W0 = W[ind]
X0 = X[ind,:]
W0_test = np.zeros((1000, 1+X.shape[1]))
W0_test += np.median(np.column_stack((X0,W0)), axis=0, keepdims=True)
W0_test[:, 2] = np.linspace(np.percentile(
            W0[:, 0], 5), np.percentile(W0[:, 0], 95), 1000)

# True parameters
b_yd = 2.0; b_ym = 1.0; b_yx = np.array([[-1.0],[-1.0]]); b_yu = -1.0; b_yw = 2.0; b_y0 = 2.0
b_wx = np.array([[0.2],[0.2]]); b_wu = -0.6; b_w0 = 0.3
b_md = -0.3; b_mx = np.array([[-0.5],[-0.5]]); b_mu = 0.4; b_m0 = 0.0
    
gamma_1w = (b_yw*b_wu + b_yu)/b_wu
gamma_1x = b_yw*b_wx + b_yx - gamma_1w*b_wx
gamma_1m = b_ym
gamma_10 = b_y0 + b_yd + b_yw*b_w0 - gamma_1w*b_w0

gamma_0w = (gamma_1m*b_mu + gamma_1w*b_wu)/b_wu
gamma_0x = gamma_1m*b_mx + gamma_1w*b_wx + gamma_1x - gamma_0w*b_wx
gamma_00 = gamma_10 + gamma_1m*b_m0 + gamma_1w*b_w0 - gamma_0w*b_w0

    # True nuisance function
expected_te = gamma_00 + tauinv_fn(W0_test)@np.row_stack((gamma_0x, gamma_0w))
D_ = D.copy()


(2000, 4)


In [31]:
rkhs_model = RKHSIVL2(kernel='rbf', gamma=.1, delta_scale='auto', delta_exp=.4)

In [32]:

dml_rkhs = DML_mediated(Y, D, M, W, Z, X,
                        estimator='OR',
                        estimand='E[Y(1,M(0))]',
                        model1 = rkhs_model,
                        model2 = rkhs_model,
                        modelq1 = rkhs_model,
                        modelq2 = rkhs_model,
                n_folds=5, n_rep=1)
print(dml_rkhs.dml())


Rep: 1


100%|██████████| 5/5 [00:07<00:00,  1.54s/it]

(4.06033182990191, 21.587023870157502, array([3.85670759, 4.26395607]))





In [33]:

ind = np.where(D==1)[0]
M1 = M[ind]
W1 = W[ind]
X1 = X[ind,:]
Z1 = Z[ind]
Y1 = Y[ind]


A2 = np.column_stack((M1,X1,Z1))
A1 = np.column_stack((M1,X1,W1))

bridge_1 = rkhs_model.fit(A2, A1, Y1)
A1 = np.column_stack((M,X,W))
bridge_1_hat = bridge_1.predict(A1)
print(np.mean(bridge_1_hat))
print(np.var(bridge_1_hat))
bridge_1_hat = bridge_1_hat.reshape(A1.shape[:1] + Y.shape[1:])
       
   
ind = np.where(D==0)[0]
W0 = W[ind]
X0 = X[ind,:]
Z0 = Z[ind]
bridge_1_hat = bridge_1_hat[ind]

B2 = np.column_stack((Z0,X0))
B1 = np.column_stack((X0,W0))


bridge_2 = rkhs_model.fit(B2, B1, bridge_1_hat)
       
gamma_0_hat = bridge_2.predict(np.column_stack((X, W))).reshape(-1, 1)

print(np.mean(gamma_0_hat))
print(np.var(gamma_0_hat))

4.150497972408939
18.745097763692986
4.109066158149168
16.215668430035784


In [34]:
rkhs2_model = RKHS2IVL2(kernel='rbf', gamma=.1, delta_scale='auto', delta_exp=.4)

In [35]:
A = np.column_stack((M,X,W))
E = np.column_stack((M,X,Z))
B = np.column_stack((X,W))
C = np.column_stack((X,Z))

rkhs2_pred= rkhs2_model.fit(A, B, C, E, Y, subsetted=True, subset_ind1=D).predict(B)
print(np.mean(rkhs2_pred))
print(np.var(rkhs2_pred))

1.4366579052234956
4.2913319081201
