In [None]:
import numpy as np
import pandas as pd

In [None]:
def gen_synthetic(n, seed = 0):
    """Generates synthetic data based on random seed"""
    np.random.seed(seed)

    # features
    p = 10
    X = np.random.multivariate_normal(np.zeros(p), np.eye(p), size=n)

    # s(x), v(x) and (x)
    s = 0.8 * X[:, 0] + 0.6 * X[:, 1] + 0.4 * X[:, 2] + 0.3 * X[:, 0]**2 - 0.2 * X[:, 1] * X[:, 2]
    v = X[:, 7] - 0.5 * X[:, 8]
    u = 0.8 * s + 0.6 * v
    e = 0.2 + 0.6 / (1 + np.exp(-u))
    T = np.random.binomial(1, e)

    # mu_0(x), tau(x) and mu_1(x)
    M0 = 0.5 * X[:, 3] - 0.4 * X[:, 4] + 0.3 * np.sin(X[:, 5]) + 0.2 * (X[:, 6]**2 - 1)
    tau = s + 0.5 * np.tanh(s)
    M1 = M0 + tau

    # y(x)
    eps = np.random.normal(0, 0.6, size=n)
    Y = M0 + T * tau + eps

    # set variables 
    df = pd.DataFrame(X, columns=[f"x{i}" for i in range(p)])
    df["T"] = T
    df["Y"] = Y
    df["M0"] = M0
    df["M1"] = M1
    df["cate"] = tau
    df["e"] = e
    df["s"] = s
    df["v"] = v

    # return
    return df

In [None]:
# run and store
df = gen_synthetic(11000, seed = 0)
df.to_csv('./synthetic.csv')