In [1]:
import pygrgl
import random

import sys
sys.path.append('/Users/adityasyam/grg_pheno_sim') 

from grg_pheno_sim.model import grg_causal_mutation_model
from grg_pheno_sim.binary_phenotype import sim_binary_phenotypes, sim_binary_phenotypes_custom


The following command only serves the purpose of converting the VCF zip file into a GRG that will be used for the phenotype simulation.

In [None]:
%%script bash --out /dev/null
echo "Test"
grg construct --no-maf-flip -p 10 -t 2 ../data/test-200-samples.vcf.gz --out-file test-200-samples.grg

In [None]:
grg_1 = pygrgl.load_immutable_grg("test-200-samples.grg") #loading in a sample grg stored in the same directory

model_type = "normal"
mean = 0
var = 1

model = grg_causal_mutation_model(model_type, mean=mean, var=var)

num_causal = 1000

random_seed = 1

normalize_genetic_values_before_noise = True

noise_heritability = 0.33

population_prevalence = 0.1 #this means 1 in 10 individuals are a case

effect_output_required = True #saves the effect sizes data for each mutation node in a .par file

effect_path = 'univariate_sample_effect_sizes.par'

standardized_output = True

output_path = 'normal_pheno_binary.phen' #define the path to be saved at, this output is saved in the file of this name in the same directory

header=True #set header parameter to true if column names are expected in output file

phenotypes_binary = sim_binary_phenotypes(grg_1, model, num_causal, population_prevalence, random_seed, normalize_genetic_values_before_noise=normalize_genetic_values_before_noise, noise_heritability=noise_heritability, save_effect_output=effect_output_required, effect_path=effect_path, standardized_output=standardized_output, path=output_path, header=header)


In [None]:
phenotypes_binary

In [None]:
binary_list = phenotypes_binary["phenotype"]
num_zeros = (binary_list == 0).sum()
num_ones = (binary_list == 1).sum()

print("Number of 0s:", num_zeros)
print("Number of 1s:", num_ones)
print("Population prevalence ratio observed: ", str(num_ones/(num_ones+num_zeros)))


The observed population prevalence above indicates that our final phenotypes approximately reflect the expected prevalence of 0.1.

We now demonstrate the use case for a higher population prevalence.

In [None]:
grg_1 = pygrgl.load_immutable_grg("test-200-samples.grg") #loading in a sample grg stored in the same directory

model_type = "normal"
mean = 0
var = 1

model = grg_causal_mutation_model(model_type, mean=mean, var=var)

num_causal = 1000

random_seed = 1

normalize_genetic_values_before_noise = True

noise_heritability = 0.33

population_prevalence_high = 0.5 #this means 1 in 2 individuals are a case

effect_output_required = True #saves the effect sizes data for each mutation node in a .par file

effect_path = 'univariate_sample_effect_sizes.par'

standardized_output = True

output_path = 'normal_pheno_binary_high_prevalence.phen' #define the path to be saved at, this output is saved in the file of this name in the same directory

header=True #set header parameter to true if column names are expected in output file

phenotypes_binary_high_prev = sim_binary_phenotypes(grg_1, model, num_causal, population_prevalence_high, random_seed, normalize_genetic_values_before_noise=normalize_genetic_values_before_noise, noise_heritability=noise_heritability, save_effect_output=effect_output_required, effect_path=effect_path, standardized_output=standardized_output, path=output_path, header=header)


In [None]:
phenotypes_binary_high_prev

In [None]:
binary_list_high_prev = phenotypes_binary_high_prev["phenotype"]
num_zeros = (binary_list_high_prev == 0).sum()
num_ones = (binary_list_high_prev == 1).sum()

print("Number of 0s:", num_zeros)
print("Number of 1s:", num_ones)
print("Population prevalence ratio observed: ", str(num_ones/(num_ones+num_zeros)))


Finally, we use custom effect sizes to compute the binary phenotypes.

In [9]:
random_effects = [random.random() for _ in range(grg_1.num_mutations)] #list input


In [None]:
normalize_genetic_values_before_noise = True

noise_heritability = 0.33

population_prevalence_custom = 0.25

standardized_output = True

output_path = 'custom_pheno_binary.phen' #define the path to be saved at, this output is saved in the file of this name in the same directory

phenotypes_list_binary = sim_binary_phenotypes_custom(grg_1, random_effects, population_prevalence_custom, normalize_genetic_values_before_noise=normalize_genetic_values_before_noise, noise_heritability=noise_heritability, standardized_output=standardized_output, path=output_path)

In [None]:
phenotypes_list_binary

In [None]:
binary_list_custom = phenotypes_list_binary["phenotype"]
num_zeros = (binary_list_custom == 0).sum()
num_ones = (binary_list_custom == 1).sum()

print("Number of 0s:", num_zeros)
print("Number of 1s:", num_ones)
print("Population prevalence ratio observed: ", str(num_ones/(num_ones+num_zeros)))
