In [2]:
import numpy as np
from scipy.stats import multivariate_normal
import pandas as pd


if __name__ == "__main__":
    # Set the parameters
    num_genes = 10  # Number of variables (genes)
    num_samples = 5  # Number of samples
    data_range = (-0.5, 0.5)

    k = 0.6
    r = 0.65

    # Generate means from a uniform distribution
    means = np.random.uniform(
        low=data_range[0], high=data_range[1], size=num_genes
    )
    # Generate the standard deviation matrix
    std_dev = np.zeros((num_genes, num_genes))
    np.fill_diagonal(
        std_dev,
        np.random.uniform(low=0.01, high=data_range[1], size=num_genes),
    )

    # Initialize the correlation matrix
    corr_matrix = np.eye(num_genes)  # with zeros

    # Define the significant genes
    l = np.random.choice(
        range(num_genes), size=int(0.2 * num_genes), replace=False
    )

    # Generate the modified means and correlations for significant genes
    for i in l:
        means[i] += k
        for j in l:
            if i != j:
                corr_matrix[i, j] += r

    # cov matrix is corr matrix_ih * std_i * std_h
    cov_matrix = std_dev.dot(corr_matrix).dot(std_dev)

    # Generate the simulated expression data
    simulated_data = multivariate_normal.rvs(
        mean=means, cov=cov_matrix, size=num_samples
    )

    # Convert the numpy array to a pandas DataFrame
    df = pd.DataFrame(simulated_data)

    # Save the DataFrame to a CSV file
    df.to_csv("simulated_data_test.csv", index=False)

    print("Data saved to simulated_data.csv")

    print(simulated_data.shape)


Data saved to simulated_data.csv
(5, 10)


In [3]:
print("means", means)
print("corr_matrix", corr_matrix)
print("cov_matrix", cov_matrix)


means [ 0.36124791 -0.12239389  1.0872388   0.24336767 -0.18461028  0.38343517
 -0.10862884  0.12641376  0.11064644  0.16519982]
corr_matrix [[1.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   1.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   1.   0.   0.   0.   0.   0.   0.65 0.  ]
 [0.   0.   0.   1.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   1.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   1.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   1.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   1.   0.   0.  ]
 [0.   0.   0.65 0.   0.   0.   0.   0.   1.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   1.  ]]
cov_matrix [[0.08631339 0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.23201425 0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.053856   0.         0.         0.
  0.         0.         0.01432101 0.      