### Example of a simple fully synthetic data generating process

In [95]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [79]:
def create_covariance(m, eigenvalues=None):
    """Create random covariance matrix with the given eigenvalue spectrum."""
    if eigenvalues is None:
        eigenvalues = np.exp(-np.arange(m))
    D = np.zeros((m,m))
    np.fill_diagonal(D, eigenvalues)
    Q, r = np.linalg.qr(np.random.normal(size=(m,m)))
    assert np.linalg.matrix_rank(Q)==m
    A = Q@D@Q.T
    return A

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def alternating_decaying_coef(m, decay=1, shuffle=False):
    t = np.arange(m)
    coef = (-1)**t * np.exp(-t*decay)
    if shuffle:
        np.random.shuffle(coef)
    return coef

In [76]:
from simulations.datagen import DGPGraph

In [87]:
def heterogenius_linear_dgp(m):
    c = create_covariance(m)
    beta = alternating_decaying_coef(m)
    w = alternating_decaying_coef(m, decay=0.1, shuffle=True)
    w_h = alternating_decaying_coef(m, shuffle=True) 
    alpha = 1

    def fX(n):
        return np.random.multivariate_normal(mean=np.zeros(m),cov = c,size=n)

    def fT(X, n):
        p = sigmoid(X@beta)
        return np.random.binomial(1, p)

    def fY(X,T,n):
        Y = X@w + ((X@w_h) + alpha)*T +np.random.normal(scale=1,size=n)
        return Y

    dgp = DGPGraph()
    dgp.add_node('X',fX)
    dgp.add_node('T',fT, parents=['X'])
    dgp.add_node('Y',fY,parents=['X','T'])
    return dgp

d = heterogenius_linear_dgp(20)

In [99]:
data = d.sample(200)

In [100]:
Y = data.pop("Y")
dX = data.pop("X")
data.update({f"X{i}": x for i, x in enumerate(dX.T)})
X = pd.DataFrame(data)