In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [3]:
ROOT_PATH = "/data/jerrylee"

In [5]:
GCTA_PATH = f"{ROOT_PATH}/tools/gcta64"
PHENO_DIR = f"{ROOT_PATH}/data/pheno-residual"
GRM_DIR = f"{ROOT_PATH}/data/UKB/unified-grm-maf001"
OUT_DIR = f"{ROOT_PATH}/data/reml-maf001"
SAMPLE_IDXS = np.arange(100) 
MGRM_DIR = f"{ROOT_PATH}/data/UKB/unified-grm-maf001/mgrm_AD_FACTOR"

In [3]:
# make MGRM file (AD, FACTOR)
def get_grm_fns(
    grm_types: list,
    grm_dir: str,
    chr_indices: list,
    sample_indices: list,
    ):
    grm_fns = []
    for grm_type in grm_types:
        for chr_idx in chr_indices:
            for sample_idx in sample_indices:
                grm_fns.append(f"{grm_dir}/chr{chr_idx}.sample_{sample_idx}.{grm_type}")
    return grm_fns

def save_mgrm_file(
    grm_fns: list,
    mgrm_fn: str,
    ):
    with open(mgrm_fn, "w") as f:
        for grm_fn in grm_fns:
            f.write(f"{grm_fn}\n")

for subsample_idx in SAMPLE_IDXS:
    for std_type in ["AD", "FACTOR"]:   
        # get grm types
        grm_types = ["add", "dom"]
        if std_type == "FACTOR":
            grm_types = ["het", "hom", "hethom"]
            # grm_types = ["het", "hom"]
            
        # get grm files
        grm_fns = get_grm_fns(
            grm_types=grm_types,
            grm_dir=GRM_DIR,
            chr_indices=["1-22"],
            sample_indices=[subsample_idx],
        )
        
        # save mgrm file
        save_mgrm_file(
            grm_fns=grm_fns,
            mgrm_fn=f"{MGRM_DIR}/chr1-22.sample_{subsample_idx}.{std_type}.mgrm",
        )


In [None]:
pheno_fns = os.listdir(PHENO_DIR)

In [None]:
# REML
def get_reml_cmd(
    gcta_path: str,
    mgrm_path: str,
    pheno_fn: str,
    out_path: str,
    thread_num: int = 10,
    no_constrain=False,
):
    cmd = [
        gcta_path,
        "--reml",
        "--mgrm", mgrm_path,
        "--pheno", pheno_fn,
        "--out", out_path,
        "--thread-num", str(thread_num),
    ]
    
    if no_constrain:
        cmd.append("--reml-no-constrain")
        
    return " ".join(cmd)
        
for pheno_fn in pheno_fns:
    pheno_name = pheno_fn.split(".")[0]
    
    if not os.path.exists(f"{OUT_DIR}/{pheno_name}"):
        os.makedirs(f"{OUT_DIR}/{pheno_name}")
        
    for sample_idx in SAMPLE_IDXS:
        for std_type in ["AD", "FACTOR"]:
            
            cmd = get_reml_cmd(
                gcta_path=GCTA_PATH,
                mgrm_path=f"{MGRM_DIR}/chr1-22.sample_{sample_idx}.{std_type}.mgrm",
                pheno_fn=f"{PHENO_DIR}/{pheno_fn}",
                out_path=f"{OUT_DIR}/{pheno_name}/{pheno_name}.sample_{sample_idx}.{std_type}",
                thread_num=30,
                no_constrain=True,
            )
            
            os.system(cmd)

*******************************************************************
* Genome-wide Complex Trait Analysis (GCTA)
* version 1.94.0 beta Linux
* (C) 2010-present, Jian Yang, The University of Queensland
* Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
*******************************************************************
Analysis started at 16:04:44 KST on Mon Jul 07 2025.
Hostname: h2

Accepted options:
--reml
--mgrm /data/jerrylee/pjt/Factor/data/UKB/unified-grm-maf001/mgrm_AD_FACTOR/chr1-22.sample_14.FACTOR.mgrm
--pheno /data/jerrylee/pjt/Factor/data/pheno-residual/Standing_height.tsv
--out /data/jerrylee/pjt/Factor/data/reml-maf001/Standing_height/Standing_height.sample_14.FACTOR
--thread-num 30
--reml-no-constrain

Note: the program will be running on 30 threads.

Reading phenotypes from [/data/jerrylee/pjt/Factor/data/pheno-residual/Standing_height.tsv].
Non-missing phenotypes of 313644 individuals are included from [/data/jerrylee/pjt/Factor/data/pheno-residual/Standing_heig