## Imports

In [2]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import matplotlib.mlab   as mlab

from kte import kernel_two_sample_test_nonuniform
from xkte import kernel_two_sample_test_agnostic
from xkte import kernel_dr_two_sample_test_agnostic
from dr_kte import kernel_dr_nonuniform


from baselines import vanilla_dr_baseline_test
from baselines import BART_baseline_test
from baselines import CausalForest_baseline_test


from sklearn.metrics import pairwise_distances

from scipy.spatial.distance import cdist
from scipy.special import expit
from scipy.stats import bernoulli
from numpy.polynomial.polynomial import polyval
from sklearn.linear_model import LogisticRegression

from tqdm import tqdm

import seaborn as sns
import pandas as pd
import time

## Main function, which runs a list of tests based on its arguments

In [3]:
def run_tests(b_list, method_list, case_list, ns_list, experiment, name_folder, num_experiments, iterations):
    noise_var = .5
    # generate data from the marginal distributions P(X_0) and P(X_1)
    d  = 5
    # generate Y_0 and Y_1 from the conditional models
    beta_vec  = np.array([0.1,0.2,0.3,0.4,0.5])
    alpha_vec = np.array([0.05,0.04,0.03,0.02,0.01])
    alpha_0   = 0.05

    np.random.seed(0)
    
    for b in b_list:
        print('b = ', b)
        for method in method_list:
            for case in case_list:
                for ns in ns_list:
                    p_values = np.zeros(num_experiments)
                    values = np.zeros(num_experiments)
                    times = np.zeros(num_experiments)

                    for n in range(num_experiments):


                        ### generate data 
                        X  = np.random.randn(ns,d)

                        if experiment:
                            Prob_vec = np.zeros(ns) + 0.5
                            a = np.concatenate((np.repeat(0, ns//2), np.repeat(1, ns-ns//2)))
                            T = np.random.choice(a,replace=False, size=len(a))
                            w = Prob_vec.copy()
                        else:
                            Prob_vec = expit(np.dot(alpha_vec,X.T) + alpha_0)
                            T  = bernoulli.rvs(Prob_vec)
                            N2 = len(T) // 2
                            w = LogisticRegression(C=1e6, max_iter=1000).fit(X, T).predict_proba(X)[:, 1]

                        if case == 1:
                            Y = np.dot(beta_vec,X.T) + noise_var*np.random.randn(X.shape[0])
                        elif case == 2:
                            Y = np.cos(np.dot(beta_vec,X.T)) + noise_var*np.random.randn(X.shape[0])

                        if b == 'I':
                            b1 = 0
                        elif b == 'II':
                            b1 = 0.5
                        elif b == 'III':
                            Z  = bernoulli.rvs(0.5,size=len(T[T==1]))
                            beta = 1.
                            b1 = (2*Z - 1)*beta
                        elif b == 'IV':
                            beta = 2
                            b1 = np.random.uniform(-beta, beta, len(T[T==1]))
                        else:
                            print('b not recognized! Setting b1 = 0.')
                            b1 = 0

                        Y[T==1] += b1
                        YY0 = Y[T==0]
                        YY1 = Y[T==1]

                        ### calculate the test statistics and p-value
                        Y = Y[:,np.newaxis]
                        YY0 = YY0[:,np.newaxis]
                        YY1 = YY1[:,np.newaxis]

                        # Gaussian RBF kernel
                        sigma2 = np.median(pairwise_distances(YY0, YY1, metric='euclidean'))**2
                            
                        if method == 'DR-xKTE':
                            t0 = time.time()
                            value, p_value = kernel_dr_two_sample_test_agnostic(Y, X, T, w,
                                                                                kernel_function='rbf',
                                                                                gamma=1.0/sigma2,
                                                                                verbose=False)
                            times[n] = time.time() - t0
                            p_values[n] = p_value
                            values[n] = value

                        elif method == 'IPW-xKTE':
                            t0 = time.time()
                            value, p_value = kernel_two_sample_test_agnostic(Y, T, w,
                                                                                kernel_function='rbf',
                                                                                gamma=1.0/sigma2,
                                                                                verbose=False)
                            times[n] = time.time() - t0
                            p_values[n] = p_value
                            values[n] = value

                        elif method == 'KTE':
                            t0 = time.time()
                            mmd2u_rbf, mmd2u_null_rbf, p_value = kernel_two_sample_test_nonuniform(YY0, YY1, T, w,
                                                                                kernel_function='rbf',
                                                                                gamma=1.0/sigma2,
                                                                                verbose=False,
                                                                                iterations=iterations)
                            times[n] = time.time() - t0
                            p_values[n] = p_value

                        elif method == 'linear':
                            t0 = time.time()
                            mmd2u_lin, mmd2u_null_lin, p_value = kernel_two_sample_test_nonuniform(YY0, YY1, T, w,
                                                                                kernel_function='linear',
                                                                                verbose=False,
                                                                                iterations=iterations)
                            times[n] = time.time() - t0
                            p_values[n] = p_value

                        elif method == 'DR-CFME':
                            t0 = time.time()
                            mmd2u_rbf, mmd2u_null_rbf, p_value = kernel_dr_nonuniform(Y, X, T, 
                                                                                    w, experiment,
                                                                                    iterations=iterations,
                                                                                    verbose=False,
                                                                                    kernel_function='rbf',
                                                                                    gamma=1.0/sigma2)
                            times[n] = time.time() - t0
                            p_values[n] = p_value

                        elif method == 'Vanilla_DR':
                            T = T[:,np.newaxis]
                            t0 = time.time()
                            vanilla_dr = vanilla_dr_baseline_test(X, T, Y, iterations)
                            p_value, value = vanilla_dr.permutation_test()
                            times[n] = time.time() - t0
                            p_values[n] = p_value
                            values[n] = value

                        elif method == 'BART':
                            T = T[:,np.newaxis]
                            t0 = time.time()
                            bart = BART_baseline_test(X, T, Y, iterations)
                            p_value, value = bart.permutation_test()
                            times[n] = time.time() - t0
                            p_values[n] = p_value
                            values[n] = value
                        elif method == 'CausalForest':
                            T = T[:,np.newaxis]
                            t0 = time.time()
                            causal_forest = CausalForest_baseline_test(X, T, Y, iterations)
                            p_value, value = causal_forest.permutation_test()
                            times[n] = time.time() - t0
                            p_values[n] = p_value
                            values[n] = value
                        else:
                            print('Method not recognized.')



                    df = pd.DataFrame()
                    df['times'] = times
                    df['p_values'] = p_values
                    df['stat_values'] = values
                    df.to_csv(name_folder + 'ns' + str(ns) + 'b' + str(b) + 'case' + str(case) + method + '.csv')

## Run functions for different settings

### Null hypothesis

In [25]:
'''
num_experiments=500
iterations=100

ns_list = np.arange(100, 1050, 50)
b_list = ['I']
case_list = [1, 2]
method_list = ['DR-xKTE', 'IPW-xKTE']

experiment_list = [False, True]
for experiment in experiment_list:
    name_folder = 'data' +str(experiment) + '/'
    run_tests(b_list, method_list, case_list, ns_list, experiment, name_folder, num_experiments, iterations)
'''

b =  I
b =  I


### Experimental setting

In [4]:
num_experiments = 500
iterations=100

ns_list = [100, 150, 200, 250, 300, 350]
b_list = ['II', 'III', 'IV']
method_list = ['DR-xKTE', 'IPW-xKTE', 'KTE']
case_list = [2]
experiment = True
name_folder = 'data' + str(experiment) + '/'


run_tests(b_list, method_list, case_list, ns_list, experiment, name_folder, num_experiments, iterations)

b =  II
b =  III
b =  IV


### Observational setting

In [5]:
num_experiments = 500
iterations=100

ns_list = [100, 150, 200, 250, 300, 350]
b_list = ['II', 'III', 'IV']
method_list = ['DR-xKTE', 'BART', 'CausalForest', 'Vanilla_DR']
case_list = [2]
experiment = False
name_folder = 'data' + str(experiment) + '/'

run_tests(b_list, method_list, case_list, ns_list, experiment, name_folder, num_experiments, iterations)

b =  II


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the

b =  III


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the

b =  IV


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the