In [1]:
# Function to generate data sets
import numpy as np
from scipy.linalg import block_diag

def dataset(N_dim=2, N_points=10000, cor=0.0, offset=0., type="full"):
    """Generate multivariate normal data"""
    if type == "block":
        # Split correlations in two independent blocks
        N1 = int(np.floor(N_dim/2))
        N2 = int(np.ceil(N_dim/2))
        cov = block_diag(
            np.full((N1,N1), cor),
            np.full((N1,N2), cor)
        )
    elif type == "linear":
        # Linearly decreasing correlation to neighbours
        cov = np.zeros((N_dim,N_dim))
        for i, c in enumerate(np.linspace(cor, -cor/2, N_dim-1)):
            cov += np.diag([c]*(N_dim-(i+1)), i+1)
        cov += cov.T
    elif type == "exp":
        # Exponentially decreasing correlation to neighbours
        cov = np.zeros((N_dim,N_dim))
        cors = cor ** (np.arange(N_dim-1) + 1)
        for i, c in enumerate(cors):
            cov += np.diag([c]*(N_dim-(i+1)), i+1)
        cov += cov.T
    elif type == "full":
        # Full identical correlation
        cov = np.full((N_dim,N_dim), cor)
    np.fill_diagonal(cov, 1)
    
    rng = np.random.default_rng()
    return rng.multivariate_normal(mean=np.zeros(N_dim), cov=cov, size=N_points) + offset

In [2]:
# Default data sets

N_DIM = 10
N_POINTS = 1000000
def default_sets(N_dim=N_DIM, N_points=N_POINTS, type="full"):
    sets = {
        "Uncorr.": (N_dim, N_points, 0.0,  0.0),
        "Corr.50": (N_dim, N_points, 0.5,  0.0),
        "Corr.90": (N_dim, N_points, 0.9,  0.0),
        "Corr.99": (N_dim, N_points, 0.99, 0.0),
    }
    keys = list(sets.keys())
    for k in keys:
        sets[k] = dataset(*sets[k], type=type)
    return sets

In [5]:
#print(default_sets(type="exp"))