In [1]:
# import packages

import numpy as np
from numpy import random

# import r packages (and functions) if needed

import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
#rpackages.importr("clusterGeneration")
cluster_generation = robjects.packages.importr("clusterGeneration")

In [2]:
# Fully Synthetic Data Sets (Set up from Künzel et al.)

# 1: Simulate the d-dimensional X.
# 2: Create Potential Outcomes Y(1) and Y(0).
# 3: Simulate Treatment Assignments trough W.

In [3]:
# 1: Simulate the d-dimensional X. #


# Setup parameters
d = 5 # TODO: set dimension according to setup
N = 10 # TODO: set sample size according to setup

# X Correlation matrix and mean
mean = np.zeros(d) # TODO: set mean according to setup
cov = np.array(cluster_generation.rcorrmatrix(d=d, alphad=1)) # TODO: set cov according to setup

# Simulate X
X = random.multivariate_normal(mean=mean, cov=cov, size=N, check_valid='warn')

In [4]:
# Check X
X

array([[-0.74074749,  2.02534779, -1.3427202 , -1.38239011, -0.70674977],
       [ 1.74599589, -0.3362886 ,  0.01406306,  1.15977944,  1.26825329],
       [-0.27696109,  0.09354568,  0.16309368,  1.133011  , -0.87553288],
       [-0.33942106,  0.45251646, -1.07492971,  0.82394602, -1.04003852],
       [ 0.22650272, -0.84417041,  0.28114598,  1.60968487, -0.39546411],
       [-0.01879224, -0.18838488,  0.18671471,  0.01497986,  0.32320393],
       [ 0.68867154, -0.91963236,  0.94998602,  0.83312917,  0.65690116],
       [-1.25665577, -0.91166074, -2.52912866, -0.68294234,  0.06212349],
       [ 0.8212063 ,  0.162733  ,  1.50835823,  0.89149651,  0.01749534],
       [ 1.07359488,  2.07309962,  0.17607447, -0.39552487,  1.04422349]])

In [5]:
# Check cov
cov

array([[ 1.        ,  0.13453029,  0.59174486,  0.21975374,  0.79353494],
       [ 0.13453029,  1.        ,  0.38195245, -0.30190798,  0.05945414],
       [ 0.59174486,  0.38195245,  1.        ,  0.0390454 ,  0.50391292],
       [ 0.21975374, -0.30190798,  0.0390454 ,  1.        , -0.25437378],
       [ 0.79353494,  0.05945414,  0.50391292, -0.25437378,  1.        ]])

In [6]:
# 2: Create Potential Outcomes Y(1) and Y(0).

# Simulate errors, FIX
e_0 = random.normal(loc=0.0,scale=1.0,size=N)
e_1 = random.normal(loc=0.0,scale=1.0,size=N)

# Just to make it work, create betas
beta_0 = [2,3,-3,1,4] # TODO: delete or change
beta_1 = [1,1,0,-5,3] # TODO: delete or change

# Create Response Functions
mu_0 = np.matmul(X,beta_0) # TODO: change according to setup
mu_1 = np.matmul(X,beta_1) # TODO: change according to setup

# Creat Potential Outcomes, FIX
Y_0 = mu_0 + e_0
Y_1 = mu_1 + e_1

In [7]:
# quick check Y_0
Y_0

array([ 4.49608621,  8.61238199, -2.98324265, -0.48431983, -4.38208087,
       -0.13709586, -0.90000517,  0.907871  , -0.06180514, 11.36094141])

In [8]:
# Quick Check Y_1
Y_1

array([ 5.65223941, -0.08773472, -9.0712955 , -5.5314685 , -9.61160292,
        1.20314993, -2.48175218,  1.45797223, -3.16483773,  8.85525703])

In [9]:
# 3: Simulate Treatment Assignments trough W.

# Set propensity score
e_x = 0.5 # TODO: change from setup

# Simulate Treatment Assignment, FIX
W = random.binomial(size=N, n=1, p=e_x)

# Create Observed Outcome, FIX
ones = np.ones(N)
Y = np.multiply(W,Y_1) + np.multiply(ones-W,Y_0)

In [10]:
# check out observed outcomes
Y

array([ 5.65223941,  8.61238199, -2.98324265, -0.48431983, -4.38208087,
        1.20314993, -0.90000517,  0.907871  , -3.16483773,  8.85525703])

In [None]:
# Random Generation matrix based on vines r package
# vine_matrix <- RVineMatrix(n = 5, type = "regular")
# vine_structure <- RVineStructure(vine_matrix)
# u <- rCopula(n = 1000, vine_structure)
# maybe use inverse CDF
# simulated_cor_matrix <- cor(u)