In [1]:
%load_ext autoreload
%load_ext lab_black
%autoreload 2

import admix
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from os.path import join
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr
import seaborn as sns
import statsmodels.api as sm
import os
import subprocess

In [2]:
DATA_DIR = "/u/project/sgss/UKBB/PRS-RESEARCH/DATA"
PLINK_DIR = join(DATA_DIR, "PLINK")

In [3]:
prefix = "hsq-0.25-pcausal-0.01-hermodel-gcta"

In [7]:
def submit_predict(prefix, sim_i, center=True):
    weights_dir = f"out/PRS-WEIGHTS/{prefix}"
    score_dir = f"out/PRS-SCORE/{prefix}"
    if not os.path.exists(score_dir):
        os.makedirs(score_dir)
    cmds = [
        "dapgen score",
        f"--plink {PLINK_DIR}",
        f"--weights {weights_dir}/sim_{sim_i}.weight.tsv.gz",
        f"--out {score_dir}/sim_{sim_i}.score.tsv.gz",
        f"--center {center}",
        "--memory 40",
        "--freq-suffix .eur_train.afreq",
        "--chrom-col CHR --alt-col A1 --ref-col A2",
    ]
    subprocess.check_call(" ".join(cmds), shell=True)

    df_score = pd.read_csv(
        f"{score_dir}/sim_{sim_i}.score.tsv.gz", sep="\t", index_col=0
    )
    df_summary = pd.DataFrame(
        {"MEAN": df_score.mean(axis=1), "SD": df_score.std(axis=1)}
    )
    q_list = np.linspace(0.05, 0.95, 19)
    df_quantile = df_score.quantile(q=q_list, axis=1).T
    df_quantile.columns = [f"QUANTILE_{int(q * 100)}" for q in q_list]
    df_summary = pd.merge(df_summary, df_quantile, left_index=True, right_index=True)
    df_summary.to_csv(
        join(f"{score_dir}/sim_{sim_i}.score_summary.tsv.gz"),
        sep="\t",
        float_format="%.6f",
    )


import submitit

executor = submitit.SgeExecutor(folder="./submitit-logs")

executor.update_parameters(
    time_min=200,
    memory_g=60,
    queue="highp",
    setup=[
        "export PATH=~/project-pasaniuc/software/miniconda3/bin:$PATH",
        "export PYTHONNOUSERSITE=True",
    ],
)

jobs = executor.map_array(submit_predict, [prefix] * 10, np.arange(10))