In [6]:
import numpy as np
import pygad

from functools import partial


# Define the objective function
def fit_func(W, v, ga_instance, solution, solution_idx):
    ###!! Calculating lambda, this method is only valid when no PPI is considered

    mean_v = np.mean(v)
    sd_v = np.std(v)
    mean_W = np.mean(W)
    sd_W = np.std(W)
    M_v = mean_v / sd_v
    M_w = mean_W / sd_W
    lambda_ = M_w / (M_v + M_w)
    print(f"Lambda: {lambda_}")

    x = np.array(solution)

    term1 = lambda_ * np.dot(x.T, np.dot(W, x))
    term2 = (1 - lambda_) * np.dot(v.T, x)
    loss = term1 + term2
    return loss


def on_generation(ga_instance):
    for sol_idx, solution in enumerate(ga_instance.population):
        # Ensure non-negativity
        solution = np.maximum(solution, 0)
        ga_instance.population[sol_idx] = solution
    print(
        f"Generation {ga_instance.generations_completed}: Best Fitness = {ga_instance.best_solution()[1]}"
    )


if __name__ == "__main__":
    for idx in range(1, 5):
        W = np.load(f"output/W_case{idx}.npy")
        v = np.load(f"output/v_case{idx}.npy")
        fitness_func = lambda ga_instance, solution, solution_idx: fit_func(
            W, v, ga_instance, solution, solution_idx
        )

        # GA parameters
        num_iterations = 10000  # Set the number of iterations # 60000
        num_generations = (
            num_iterations  # Assuming one generation per iteration
        )
        num_parents_mating = 10
        sol_per_pop = 20
        num_genes = len(W)  # Ensure num_genes matches the dimension of W and v
        mutation_rate = 1 / (num_genes + 1)  # Set mutation rate
        crossover_rate = 0.5  # Set crossover rate

        # Creating an instance of the GA
        ga_instance = pygad.GA(
            num_generations=num_generations,
            num_parents_mating=num_parents_mating,
            fitness_func=fitness_func,
            sol_per_pop=sol_per_pop,
            num_genes=num_genes,
            mutation_type="random",
            mutation_percent_genes=20,  # pygad uses percentage
            crossover_type="single_point",  # Experiment with different crossover methods
            crossover_probability=crossover_rate,  # Set crossover rate
            on_generation=on_generation,
            stop_criteria=[
                "saturate_100"
            ],  # Stop if no improvement for 100 generations
            keep_parents=1,
            initial_population=np.random.uniform(
                low=0.0, high=1.0, size=(sol_per_pop, num_genes)
            ),  # Ensure initial population in [0,1]
        )
        # Running the GA
        ga_instance.run()

        # Best solution
        solution, solution_fitness, solution_idx = ga_instance.best_solution()
        print("Best Solution: ", solution)
        print("Best Solution Fitness: ", solution_fitness)

        # Save the best solution
        # np.save(f"output/best_solution_case{idx}.npy", solution)


Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.48796191578658255
Lambda: 0.4879619157

KeyboardInterrupt: 

In [9]:
import numpy as np

# Load your W matrix
W = np.load("output/v_W/W_case2.npy")

rtol = 1e-02  # relative tolerance
atol = 1e-03  

if np.allclose(W, W.T, rtol=rtol, atol=atol):
    print("W is symmetric.")

else:
    print("W is not symmetric.")

#print to 3 s.f.
np.set_printoptions(precision=5)
print(W[:5, :5])
print("=============")
print(W.T)



W is not symmetric.
[[2.36792e-17 1.25733e+01 4.58668e-01 4.62585e-01 5.45183e-01]
 [3.35846e+00 2.39439e-16 3.35663e+00 3.35580e+00 3.35369e+00]
 [1.55892e-01 9.18533e+00 1.80846e-18 1.52127e-01 1.29038e-01]
 [1.31500e-01 1.11469e+01 1.25391e-01 1.18172e-16 1.68509e-01]
 [6.64737e-02 1.69014e+00 5.20587e-02 5.60532e-02 9.82460e-18]]
[[2.36792e-17 3.35846e+00 1.55892e-01 ... 4.52723e-02 3.09185e-01
  0.00000e+00]
 [1.25733e+01 2.39439e-16 9.18533e+00 ... 1.94375e+01 2.11762e+01
  0.00000e+00]
 [4.58668e-01 3.35663e+00 1.80846e-18 ... 5.87584e-02 4.12739e-01
  0.00000e+00]
 ...
 [4.54572e-01 3.35550e+00 1.61108e-01 ... 1.83586e-17 3.88158e-01
  0.00000e+00]
 [3.88630e-01 3.35938e+00 1.98867e-01 ... 6.68339e-02 1.54275e-17
  0.00000e+00]
 [7.63562e-01 3.33906e+00 1.68955e-01 ... 2.22946e-01 3.24079e-01
  0.00000e+00]]


In [2]:
import numpy as np
from scipy.stats import multivariate_normal
import pandas as pd


if __name__ == "__main__":
    # Set the parameters
    num_genes = 10  # Number of variables (genes)
    num_samples = 5  # Number of samples
    data_range = (-0.5, 0.5)

    k = 0.6
    r = 0.65

    # Generate means from a uniform distribution
    means = np.random.uniform(
        low=data_range[0], high=data_range[1], size=num_genes
    )
    # Generate the standard deviation matrix
    std_dev = np.zeros((num_genes, num_genes))
    np.fill_diagonal(
        std_dev,
        np.random.uniform(low=0.01, high=data_range[1], size=num_genes),
    )

    # Initialize the correlation matrix
    corr_matrix = np.eye(num_genes)  # with zeros

    # Define the significant genes
    l = np.random.choice(
        range(num_genes), size=int(0.2 * num_genes), replace=False
    )

    # Generate the modified means and correlations for significant genes
    for i in l:
        means[i] += k
        for j in l:
            if i != j:
                corr_matrix[i, j] += r

    # cov matrix is corr matrix_ih * std_i * std_h
    cov_matrix = std_dev.dot(corr_matrix).dot(std_dev)

    # Generate the simulated expression data
    simulated_data = multivariate_normal.rvs(
        mean=means, cov=cov_matrix, size=num_samples
    )

    # Convert the numpy array to a pandas DataFrame
    df = pd.DataFrame(simulated_data)

    # Save the DataFrame to a CSV file
    df.to_csv("simulated_data_test.csv", index=False)

    print("Data saved to simulated_data.csv")

    print(simulated_data.shape)


Data saved to simulated_data.csv
(5, 10)


In [3]:
print("means", means)
print("corr_matrix", corr_matrix)
print("cov_matrix", cov_matrix)


means [ 0.36124791 -0.12239389  1.0872388   0.24336767 -0.18461028  0.38343517
 -0.10862884  0.12641376  0.11064644  0.16519982]
corr_matrix [[1.   0.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   1.   0.   0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   1.   0.   0.   0.   0.   0.   0.65 0.  ]
 [0.   0.   0.   1.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   1.   0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   1.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   1.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   1.   0.   0.  ]
 [0.   0.   0.65 0.   0.   0.   0.   0.   1.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   1.  ]]
cov_matrix [[0.08631339 0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.23201425 0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.053856   0.         0.         0.
  0.         0.         0.01432101 0.      