# Description

TODO

# Environment variables

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import conf

In [None]:
N_JOBS = conf.GENERAL["N_JOBS"]
display(N_JOBS)

In [None]:
%env MKL_NUM_THREADS=$N_JOBS
%env OPEN_BLAS_NUM_THREADS=$N_JOBS
%env NUMEXPR_NUM_THREADS=$N_JOBS
%env OMP_NUM_THREADS=$N_JOBS

# Modules

In [None]:
from pathlib import Path

import statsmodels.api as sm
import numpy as np
import pandas as pd
from sklearn.preprocessing import scale
from tqdm import tqdm

from gls import GLSPhenoplier

# Settings

In [None]:
N_SIMULATED_PHENOTYPES = 10

In [None]:
OUTPUT_DIR = conf.RESULTS["GLS"]
display(OUTPUT_DIR)

OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

In [None]:
OUTPUT_FILENAME = OUTPUT_DIR / "gls-null_simulations.pkl"
display(OUTPUT_FILENAME)

# Load data

## MultiPLIER Z matrix

In [None]:
multiplier_z_matrix = pd.read_pickle(conf.MULTIPLIER["MODEL_Z_MATRIX_FILE"])

In [None]:
multiplier_z_matrix.shape

In [None]:
multiplier_z_matrix.head()

In [None]:
lv_codes = list(multiplier_z_matrix.columns)
display(lv_codes[:5])

## PhenomeXcan (S-MultiXcan)

In [None]:
# INPUT_SUBSET = "z_score_std"

In [None]:
# INPUT_STEM = "projection-smultixcan-efo_partial-mashr-zscores"

In [None]:
# input_filepath = Path(
#     conf.RESULTS["DATA_TRANSFORMATIONS_DIR"],
#     INPUT_SUBSET,
#     f"{INPUT_SUBSET}-{INPUT_STEM}.pkl",
# ).resolve()

In [None]:
# data = pd.read_pickle(input_filepath)

In [None]:
# data.shape

In [None]:
# data.head()

# GLSPhenoplier

## Load `lv_weights`

In [None]:
lv_weights = GLSPhenoplier._get_data(
    conf.PHENOMEXCAN["SMULTIXCAN_EFO_PARTIAL_MASHR_ZSCORES_FILE"]
)[2]

In [None]:
lv_weights.shape

In [None]:
lv_weights.head()

## Run

In [None]:
rs = np.random.RandomState(0)

In [None]:
results = []

pbar = tqdm(total=int(N_SIMULATED_PHENOTYPES * len(lv_codes)))

for idx in range(N_SIMULATED_PHENOTYPES):
    # generate a random phenotype
    phenotype_code = f"random_normal-{idx}"

    phenotype = pd.Series(
        # use abs to simulate MultiPLIER z-scores (always positives)
        np.abs(rs.normal(size=lv_weights.shape[0])),
        index=lv_weights.index.copy(),
        name=phenotype_code,
    )

    # compute an association for all LVs
    for lv_code in lv_codes[:3]:
        pbar.set_description(f"{phenotype_code} - {lv_code}")

        gls_model = GLSPhenoplier(
            smultixcan_result_set_filepath=conf.PHENOMEXCAN[
                "SMULTIXCAN_EFO_PARTIAL_MASHR_ZSCORES_FILE"
            ]
        )
        gls_model.fit_named(lv_code, phenotype)
        res = gls_model.results

        results.append(
            {
                "phenotype": phenotype_code,
                "lv": lv_code,
                "coef": res.params.loc["lv"],
                "pvalue": res.pvalues_onesided.loc["lv"],
                "pvalue_twosided": res.pvalues.loc["lv"],
                "summary": gls_model.results_summary,
            }
        )

        # save results every 10 models trained
        if (idx % 10) == 0:
            pd.DataFrame(results).to_pickle(OUTPUT_FILENAME)

        pbar.update(1)

pbar.close()

In [None]:
results = pd.DataFrame(results)

In [None]:
results.shape

In [None]:
results.head()

In [None]:
results.sort_values("pvalue").head(10)

## Save

In [None]:
results.to_pickle(OUTPUT_FILENAME)