In [None]:
# import packages

import numpy as np
from numpy import random
from scipy import stats
import pandas as pd
import copy

# import r packages (and functions) if needed

import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
#rpackages.importr("clusterGeneration")
#rpackages.importr("mpower")
#cluster_generation = robjects.packages.importr("clusterGeneration")
mpower = robjects.packages.importr("mpower")

In [None]:
# Fully Synthetic Data Sets (Set up from Künzel et al.)

# 1: Simulate the d-dimensional X.
# 2: Create Potential Outcomes Y(1) and Y(0).
# 3: Simulate Treatment Assignments trough W.

In [None]:
# 1: Simulate the d-dimensional X


# Setup parameters
d = 25 # TODO: set dimension according to setup
N = 100 # TODO: set sample size according to setup

# X Correlation matrix and mean
mean = np.zeros(d) # TODO: set mean according to setup
cov = np.array(mpower.cvine(d=d, alpha = 0.5, beta = 0.5)) # TODO: set cov according to setup

# Simulate X
X = random.multivariate_normal(mean=mean, cov=cov, size=N, check_valid='warn')

In [None]:
# Check X
X

In [None]:
# Check cov
cov

In [None]:
# 2: Create Potential Outcomes Y(1) and Y(0).

# 2.1 Simulate errors, FIX
e_0 = random.normal(loc=0.0,scale=1.0,size=N)
e_1 = random.normal(loc=0.0,scale=1.0,size=N)

In [None]:
# Just to make it work, create betas
beta_0 = [2,3,-3,1,4] # TODO: delete or change
beta_1 = [1,1,0,-5,3] # TODO: delete or change

# Create Response Functions
mu_0 = np.matmul(X,beta_0) # TODO: change according to setup
mu_1 = np.matmul(X,beta_1) # TODO: change according to setup

In [None]:
# 2.2 Create Response Functions


In [None]:
# SI1 simple cate  (unbalanced, simple cate, no confounding)
# SI? (balanced, simple cate, no confounding) (just change e_x=0.5)
betas_0 = random.uniform(low=-5, high=5, size=d)
mu_0 = np.matmul(X,betas_0) + 5*np.int8(X[:,0]>0.5)
mu_1 = mu_0 + 8*np.int8(X[:,1]>0.1)
tau = mu_1 - mu_0

In [None]:
# SI2 complex linear cate  (in Künzel it is low=1, high=30)
betas_0 = random.uniform(low=-15, high=15, size=d)
betas_1 = random.uniform(low=-15, high=15, size=d)
mu_0 = np.matmul(X,betas_0)
mu_1 = np.matmul(X,betas_1)
tau = mu_1 - mu_0

In [None]:
# SI3 complex non-linear
def varsigma_funct(x):
    return 2/(1+np.exp(-12*(x-1/2)))

mu_0 = -1/2*varsigma_funct(x=X[:,0])*varsigma_funct(x=X[:,1])
mu_1 = 1/2*varsigma_funct(x=X[:,0])*varsigma_funct(x=X[:,1])
tau = mu_1 - mu_0

In [None]:
# SI4 no treatment effect (global linear response)
betas_noT = random.uniform(low=-15, high=15, size=d)
mu_0 = np.matmul(X,betas_noT)
mu_1 = mu_0
tau = np.zeros(N)

In [None]:
# SI5 no treatment effect (piecewise linear response)
def piecewise_linear(x):
    betas = random.uniform(low=-15, high=15, size=d)

    betas_l = copy.copy(betas) # betas_lower
    betas_l[5:d] = 0

    betas_m = copy.copy(betas) #betas_middle
    betas_m[0:4] = 0
    betas_m[9:d] = 0

    betas_u = copy.copy(betas) #betas_upper
    betas_u[0:10] = 0
    betas_u[15:d] = 0

    array = np.zeros(shape=(N))

    for i in range(N):
        if (x[i, 19] < -0.4):
            array[i]= np.matmul(x[i,:], betas_l)
        else:
            if (x[i, 19] > 0.4):
                array[i] = np.matmul(x[i,:],betas_m)
            else:
                array[i] =  np.matmul(x[i,:],betas_u)

    return array

mu_0 = piecewise_linear(X)

mu_1 = mu_0

tau = np.zeros(N)

# TODO: CHECK IF IT REALLY WORKS --> MAY BE RIGHT NOW! :)

In [None]:
# SI6 beta confouned, no treatment effect
X = random.uniform(low=0, high=1, size=(N,d)) # ACHTUNG: nöd wiederhole!
mu_0 = 2*X[:,0] - 1
mu_1 = mu_0
tau = np.zeros(N)


In [None]:
# SI? beta confounded, simple cate?


In [None]:
# 2.3 Creat Potential Outcomes, FIX
Y_0 = mu_0 + e_0
Y_1 = mu_1 + e_1

In [None]:
# quick check Y_0
Y_0

In [None]:
# Quick Check Y_1
Y_1

In [None]:
# 3.1 Propensity score setups # TODO: change for setup

# i) constant, balanced
e_x = 0.5

# ii) constant, unbalanced
e_x = 0.01

In [None]:
# iii) SI6 beta confounded
X = random.uniform(low=0, high=1, size=(N,d))
beta_dist = stats.beta(a=2, b=4) # set beta distribution
beta_values = beta_dist.pdf(X[:,0]) # calculate pdf values for x1
e_x = 1/4*(1+beta_values)

pd.DataFrame(e_x).describe() # summary stats of e_x

In [None]:
# iv) SI7 beta confounded - unbalanced
X = random.uniform(low=0, high=1, size=(N,d))
beta_dist = stats.beta(a=1, b=10) # set beta distribution
beta_values = beta_dist.pdf(X[:,0]) # calculate pdf values for x1
e_x = 1/100*(1+beta_values)

pd.DataFrame(e_x).describe() # summary stats of e_x

In [None]:
X

In [None]:
# 3.2 Simulate Treatment Assignments trough W

# Simulate Treatment Assignment, FIX
W = random.binomial(size=N, n=1, p=e_x)

# Create Observed Outcome, FIX
ones = np.ones(N)
Y = np.multiply(W,Y_1) + np.multiply(ones-W,Y_0)

In [None]:
# check out observed outcomes
Y