In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import Lasso, LassoCV, LogisticRegression, LogisticRegressionCV, LinearRegression,\
    ElasticNet, ElasticNetCV, MultiTaskElasticNet, MultiTaskElasticNetCV
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
import mliv.dgps_mediated as dgps
from mliv.ensemble import EnsembleIV, EnsembleIVStar
from mliv.rkhs import ApproxRKHSIVCV, RKHSIVCV
from mliv.shape import LipschitzShapeIV, ShapeIV
from mliv.linear import OptimisticHedgeVsOptimisticHedge, StochasticOptimisticHedgeVsOptimisticHedge
from mliv.linear import L2OptimisticHedgeVsOGD, L2ProxGradient
from sklearn.pipeline import Pipeline

from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
from sklearn.cluster import KMeans
#from mliv.neuralnet.deepiv_fit import deep_iv_fit
from mliv.neuralnet.rbflayer import gaussian, inverse_multiquadric
from mliv.neuralnet import AGMM, KernelLayerMMDGMM, CentroidMMDGMM, KernelLossAGMM, MMDGMM
from mliv.tsls import tsls, regtsls

p = 0.1  # dropout prob of dropout layers throughout notebook
n_hidden = 100  # width of hidden layers throughout notebook

# For any method that use a projection of z into features g(z)
g_features = 100

# The kernel function
kernel_fn = gaussian
# kernel_fn = inverse_multiquadric

# Training params
learner_lr = 1e-4
adversary_lr = 1e-4
learner_l2 = 1e-3
adversary_l2 = 1e-4
adversary_norm_reg = 1e-3
n_epochs = 300
bs = 100
sigma = 2.0 / g_features
n_centers = 100
device = torch.cuda.current_device() if torch.cuda.is_available() else None


In [None]:
from dml_mediated import DML_mediated

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('../data/JCdata.csv', delimiter=' ', header=0)

# Bind covariates
X = df[["female", "age", "race_white", "race_black", "race_hispanic", "educ_geddiploma",
        "educ_hsdiploma", "ntv_engl", "marstat_divorced", "marstat_separated",
        "marstat_livetogunm", "marstat_married", "haschldY0", "everwkd", "mwearn",
        "hohhd0", "nonres", "g10", "g10missdum", "work_dad_didnotwork", "g2", "g5",
        "g7", "welfare_child", "welfare_childmissdum", "h1_fair_poor", "h2", "h29",
        "h5", "h5missdum", "h7", "h7missdum", "i1", "i10"]][df["e12missdum"] == 0].values

df['a'] = df['d'][df["e12missdum"] == 0].apply(lambda x: 1 if x > 0 else 0)

# Create proxies
Z = df[["e12", "e37"]][df["e12missdum"] == 0].values
W = df[["e32", "e8_recruitersoffice"]][df["e12missdum"] == 0].values


# Outcome
Y = df[["y"]][df["e12missdum"] == 0].values
# Mediator
M = df[["m"]][df["e12missdum"] == 0].values
# Treatment
D = df[["a"]][df["e12missdum"] == 0].values

W.shape

In [None]:
rkhs_model = ApproxRKHSIVCV(kernel_approx='nystrom', n_components=100,
                           kernel='rbf', gamma=.1, delta_scale='auto',
                           delta_exp=.4, alpha_scales=np.geomspace(1, 10000, 10), cv=5)

def _get_learner(n_t):
    return nn.Sequential(nn.Dropout(p=p), nn.Linear(n_t, n_hidden), nn.LeakyReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, 1))


def _get_adversary(n_z):
    return nn.Sequential(nn.Dropout(p=p), nn.Linear(n_z, n_hidden), nn.LeakyReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, 1))


def _get_adversary_g(n_z):
    return nn.Sequential(nn.Dropout(p=p), nn.Linear(n_z, n_hidden), nn.LeakyReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, g_features), nn.ReLU())


agmm_1 = AGMM(_get_learner(37),_get_adversary(37))
agmm_2 = AGMM(_get_learner(36),_get_adversary(36))

In [None]:
dml_rkhs = DML_mediated(Y, D, M, W, X, Z,
                        model1 = ApproxRKHSIVCV(kernel_approx='nystrom', n_components=1000,
                           kernel='rbf', gamma=.1, delta_scale='auto',
                           delta_exp=.4, alpha_scales=np.geomspace(1, 10000, 10), cv=10),
                        model2 = ApproxRKHSIVCV(kernel_approx='nystrom', n_components=100,
                           kernel='rbf', gamma=.1, delta_scale='auto',
                           delta_exp=.4, alpha_scales=np.geomspace(1, 10000, 10), cv=5),
                        modelq1 = ApproxRKHSIVCV(kernel_approx='nystrom', n_components=100,
                           kernel='rbf', gamma=.1, delta_scale='auto',
                           delta_exp=.4, alpha_scales=np.geomspace(1, 10000, 10), cv=5),
                        modelq2 = ApproxRKHSIVCV(kernel_approx='nystrom', n_components=100,
                           kernel='rbf', gamma=.1, delta_scale='auto',
                           delta_exp=.4, alpha_scales=np.geomspace(1, 10000, 10), cv=5),
                n_folds=30, n_rep=1, CHIM = False, prop_score=LogisticRegression(max_iter=1000))
                
dml_rkhs_chim = DML_mediated(Y, D, M, W, X, Z,
                        model1 = rkhs_model,
                        model2 = rkhs_model,
                        modelq1 = rkhs_model,
                        modelq2 = rkhs_model,
                n_folds=5, n_rep=3, CHIM = True, prop_score=LogisticRegression(max_iter=1000))

dml_rkhs_ipw = DML_mediated(Y, D, M, W, X, Z,
                        model1 = rkhs_model,
                        model2 = rkhs_model,
                        modelq1 = rkhs_model,
                        modelq2 = rkhs_model,
                n_folds=5, n_rep=3, estimator='IPW', prop_score=LogisticRegression(max_iter=1000))

dml_rkhs_ipw_chim = DML_mediated(Y, D, M, W, X, Z,
                        model1 = rkhs_model,
                        model2 = rkhs_model,
                        modelq1 = rkhs_model,
                        modelq2 = rkhs_model,
                n_folds=5, n_rep=3, estimator='IPW', CHIM = True, prop_score=LogisticRegression(max_iter=1000))                

dml_agmm = DML_mediated(Y, D, M, W, X, Z,
                        model1 = agmm_1,
                        model2 = agmm_2,
                        modelq1 = agmm_2,
                        modelq2 = agmm_1,
                        n_folds=5, n_rep=1,
                        CHIM = False,
                        prop_score=LogisticRegression(max_iter=1000),
                        nn_1 = True,
                        nn_2 = True,
                        nn_q1 = True,
                        nn_q2 = True,
                        fitargs1 = {'n_epochs': 300, 'bs': 100, 'learner_lr': 1e-4, 'adversary_lr': 1e-4, 'learner_l2': 1e-3, 'adversary_l2': 1e-4, 'adversary_norm_reg' : 1e-3},
                        fitargs2 = {'n_epochs': 300, 'bs': 100, 'learner_lr': 1e-4, 'adversary_lr': 1e-4, 'learner_l2': 1e-3, 'adversary_l2': 1e-4},
                        fitargsq1 = {'n_epochs': 300, 'bs': 100, 'learner_lr': 1e-4, 'adversary_lr': 1e-4, 'learner_l2': 1e-3, 'adversary_l2': 1e-4},
                        fitargsq2 = {'n_epochs': 300, 'bs': 100, 'learner_lr': 1e-4, 'adversary_lr': 1e-4, 'learner_l2': 1e-3, 'adversary_l2': 1e-4},
                        opts = {'lin_degree': 1, 'burnin': 200})

dml_2sls = DML_mediated(Y, D, M, W, X, Z,
                        model1 = tsls(),
                        model2 = tsls(),
                        modelq1 = tsls(),
                        modelq2 = tsls(),
                n_folds=10, n_rep=3, prop_score=LogisticRegression(max_iter=1000))


dml_rfiv = DML_mediated(Y, D, M, W, X, Z,
                        model1 = EnsembleIV(n_iter=200, max_abs_value=2),
                        model2 = EnsembleIV(n_iter=200, max_abs_value=2),
                        modelq1 = EnsembleIV(n_iter=200, max_abs_value=2),
                        modelq2 = EnsembleIV(n_iter=200, max_abs_value=2),
                n_folds=5, n_rep=1, prop_score=LogisticRegression(max_iter=1000))


In [None]:
print(dml_2sls.dml())

In [None]:
print(dml_rkhs.dml())

In [None]:
print(dml_rkhs_chim.dml())
print(dml_rkhs_ipw.dml())
print(dml_rkhs_ipw_chim.dml())

In [None]:
print(dml_agmm.dml())

In [None]:
def doubly_robust(X, T, Y):
    ps = LogisticRegression(C=1e6, max_iter=1000).fit(X, T).predict_proba(X)[:, 1]
    mask1 = np.where(T==1)[0]
    mask0 = np.where(T==0)[0]
    mu0 = LinearRegression().fit(X[mask0,], Y[mask0]).predict(X)
    mu1 = LinearRegression().fit(X[mask1,], Y[mask1]).predict(X)
    return (np.mean(T * (Y - mu1) / ps + mu1), np.mean((1 - T) * (Y - mu0) / (1 - ps) + mu0))


y1, y0 = doubly_robust(X, D, Y)


In [None]:

print(y1)
print(y0)
print(y1 - y0)
print(0.0895757991453447-y0)
print(0.01544137589055832-y0)
print(0.14523434238839902-y0)
