In [None]:
import pandas as pd
import numpy as np

def generate_data(p, n, d, g, random_state=None):
    """
    p - bernoulli distibution pobability of class 1
    n - number of samples
    d - number of features
    g - covariance decay factor (controls how strongly features are correlated in the covariance matrix) 

    0 < p, g < 1
    n, d >= 1
    """

    if random_state is not None:
        np.random.seed(random_state)
    
    Y = np.random.binomial(n=1, p=p, size=n)
    
    mean_0 = np.zeros(d)
    mean_1 = np.array([1/i for i in range(1, d + 1)])

    S = np.fromfunction(lambda i, j: g ** np.abs(i - j), (d, d), dtype=float)
    
    X = np.array([
        np.random.multivariate_normal(mean_0 if y == 0 else mean_1, S)
        for y in Y
    ])
    
    return X, Y

In [9]:
p = 0.5
n = 10
d = 4
g = 0.8

generate_data(p, n, d, g, random_state=None)

(array([[ 2.20797553,  1.71817889,  0.3524896 ,  0.19556076],
        [-0.31951894,  0.20977864,  1.15822256,  1.43306738],
        [ 0.70159697,  0.48317554,  1.03877838,  0.21112802],
        [-0.66196377, -0.29820445, -1.21706781, -0.87857212],
        [ 2.06312995,  0.84744764,  0.19397175, -0.65656661],
        [-1.28251338, -0.1825835 ,  0.01679285, -0.22486265],
        [-0.21220505, -0.01658783,  0.68286282,  0.44928951],
        [ 0.84229523,  0.09362009,  0.40955012, -0.25289282],
        [ 2.1473269 ,  1.86721866,  2.87916859,  2.00138519],
        [-1.52070446, -0.92223722,  0.37522766, -0.51386324]]),
 array([1, 0, 1, 1, 1, 0, 0, 0, 1, 0]))