# Phenotype simulation from real genotypes

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black

In [2]:
from os.path import join
import numpy as np
import pandas as pd
import admix
import matplotlib.pyplot as plt
from os.path import join
import submitit
import dapgen
from tqdm import tqdm
import dask.array as da
import os
import itertools
import utils

In [3]:
PLINK_DIR = "../00-compile-data/out/PLINK/all"
N_SIM = 100
bfile_list = [join(PLINK_DIR, f"chr{chrom}") for chrom in range(1, 23)]

In [4]:
def simulate_pheno(hsq, causal_prop, hermodel, out_dir):

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    np.random.seed(42)
    utils.simulate_quant_pheno(
        bfile_list=bfile_list,
        hsq=hsq,
        causal_prop=causal_prop,
        out_prefix=out_dir + "/sim",
        hermodel=hermodel,
        n_sim=N_SIM,
    )

In [5]:
df_params = pd.DataFrame(
    [
        params
        for params in itertools.product(
            [0.05, 0.25],
            [0.001, 0.01],
            ["mafukb", "uniform", "gcta"],
        )
    ],
    columns=["hsq", "pcausal", "hermodel"],
)

df_params["out_dir"] = df_params.apply(
    lambda r: f"out/PHENO/hsq-{r.hsq}-pcausal-{r.pcausal}-hermodel-{r.hermodel}", axis=1
)

In [6]:
import submitit

executor = submitit.SgeExecutor(folder="./submitit-logs")

executor.update_parameters(
    time_min=600,
    memory_g=30,
    setup=[
        "export PATH=~/project-pasaniuc/software/miniconda3/bin:$PATH",
        "export PYTHONNOUSERSITE=True",
    ],
)

jobs = executor.map_array(
    simulate_pheno,
    df_params.hsq,
    df_params.pcausal,
    df_params.hermodel,
    df_params.out_dir,
)