In [1]:
import pandas as pd
import numpy as np

In [2]:
def toeplitz(d, rho):
    idx = np.arange(d)
    return rho ** np.abs(idx[:, None] - idx[None, :])

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def simulate_data(n=1000, d=10, rho=0.9, beta=None, seed=0):
    """
    Simulate data:
      X ~ N(0, Σ),  Σ_ij = rho^{|i-j|},
      Y ~ Bernoulli( sigmoid(X @ beta) ).
    """
    rng = np.random.default_rng(seed)
    sigma = toeplitz(d, rho)

    # default beta if not provided
    if beta is None:
        beta = np.concatenate([np.zeros(d // 2), np.ones(d // 2)])
    beta = np.asarray(beta)

    # sample X
    L = np.linalg.cholesky(sigma)
    Z = rng.standard_normal((n, d))
    X = Z @ L.T

    # sample Y
    logits = X @ beta
    p = sigmoid(logits)
    y = rng.binomial(1, p)

    return X, y, beta, sigma

In [None]:
X, y, beta, Sigma = simulate_data(n=2000, d=10, rho=0.9, seed=1080)
data = pd.DataFrame(np.concatenate((X, y.reshape(-1, 1)), axis=1))
data.rename(columns={data.shape[1]-1: 'y'}, inplace=True)
data.to_csv('data_1080.csv')

In [6]:
X, y, beta, Sigma = simulate_data(n=1000, d=10, rho=0.9, seed=42)
data = pd.DataFrame(np.concatenate((X, y.reshape(-1, 1)), axis=1))
data.rename(columns={data.shape[1]-1: 'y'}, inplace=True)
data.to_csv('data_42.csv')