# Description

# Modules

In [1]:
import os
from pathlib import Path

# Settings

In [2]:
PHENOPLIER_NOTEBOOK_FILEPATH = None
PHENOPLIER_NOTEBOOK_DIR = os.getcwd()

In [3]:
# Parameters
PHENOPLIER_NOTEBOOK_FILEPATH = (
    "projects/asthma-copd/nbs/30_gls_phenoplier/01-run_gls_phenoplier.ipynb"
)

In [4]:
INPUT_NOTEBOOK_FILENAME = "07-compile_gwas_snps_and_twas_genes.ipynb"
OUTPUT_NOTEBOOK_FILENAME = "07-compile_gwas_snps_and_twas_genes.run.ipynb"

In [5]:
if PHENOPLIER_NOTEBOOK_FILEPATH is not None:
    PHENOPLIER_NOTEBOOK_DIR = str(Path(PHENOPLIER_NOTEBOOK_FILEPATH).parent)

display(PHENOPLIER_NOTEBOOK_DIR)

'projects/asthma-copd/nbs/30_gls_phenoplier'

# Run

In [6]:
%env PHENOPLIER_NOTEBOOK_DIR=$PHENOPLIER_NOTEBOOK_DIR

env: PHENOPLIER_NOTEBOOK_DIR=projects/asthma-copd/nbs/30_gls_phenoplier


In [7]:
%%bash
run_job () {
  # read trait information
  IFS=',' read -r pheno_id desc file sample_size n_cases <<< "$1"

  # CODE_RELATIVE_DIR="$1"
  CODE_DIR="${PHENOPLIER_NOTEBOOK_DIR}"

  # GWAS_DIR=${PHENOPLIER_PROJECTS_ASTHMA_COPD_RESULTS_DIR}/final_imputed_gwas
  INPUT_FILENAME=${file%.*}
  GENE_CORR_FILE="${PHENOPLIER_PROJECTS_ASTHMA_COPD_RESULTS_DIR}/gls_phenoplier/gene_corrs/${pheno_id}/gene_corrs-symbols.per_lv"

  SMULTIXCAN_DIR="${PHENOPLIER_PROJECTS_ASTHMA_COPD_RESULTS_DIR}/twas/smultixcan"
  SMULTIXCAN_FILE="${SMULTIXCAN_DIR}/${INPUT_FILENAME}-gtex_v8-mashr-smultixcan.txt"

  OUTPUT_DIR="${PHENOPLIER_PROJECTS_ASTHMA_COPD_RESULTS_DIR}/gls_phenoplier/gls"
  mkdir -p "${OUTPUT_DIR}"

  OUTPUT_FILENAME_BASE="${INPUT_FILENAME}-gls_phenoplier"

  LOGS_DIR="${CODE_DIR}/jobs_output"
  mkdir -p "${LOGS_DIR}"

  # make sure we are not also parallelizing within numpy, etc
  export NUMBA_NUM_THREADS=1
  export MKL_NUM_THREADS=1
  export OPEN_BLAS_NUM_THREADS=1
  export NUMEXPR_NUM_THREADS=1
  export OMP_NUM_THREADS=1

  echo "Running for $pheno_id"
  echo "Saving results in ${OUTPUT_DIR}/${OUTPUT_FILENAME_BASE}.tsv.gz"
  echo "Saving logs in ${LOGS_DIR}/${OUTPUT_FILENAME_BASE}.log"

  bash "${PHENOPLIER_CODE_DIR}/scripts/gls_phenoplier.sh" \
    --input-file "${SMULTIXCAN_FILE}" \
    --gene-corr-file "${GENE_CORR_FILE}" \
    --covars "gene_size gene_size_log gene_density gene_density_log" \
    --debug-use-sub-gene-corr 1 \
    --output-file "${OUTPUT_DIR}/${OUTPUT_FILENAME_BASE}.tsv.gz" > "${LOGS_DIR}/${OUTPUT_FILENAME_BASE}.log" 2>&1

  echo
}

# export function so GNU Parallel can see it
export -f run_job

# generate a list of run_job calls for GNU Parallel
while IFS= read -r line; do
    echo run_job "${line}"
done < <(tail -n "+2" "${PHENOPLIER_PROJECTS_ASTHMA_COPD_DATA_DIR}/traits_info.csv") |
    parallel -k --lb --halt 2 -j${PHENOPLIER_GENERAL_N_JOBS}

Running for asthma_only
Saving results in /opt/data/projects/asthma-copd/results/gls_phenoplier/gls/GWAS_Asthma_only_GLM_SNPs_info0.7-gls_phenoplier.tsv.gz
Saving logs in projects/asthma-copd/nbs/30_gls_phenoplier/jobs_output/GWAS_Asthma_only_GLM_SNPs_info0.7-gls_phenoplier.log

Running for copd_only
Saving results in /opt/data/projects/asthma-copd/results/gls_phenoplier/gls/GWAS_COPD_only_GLM_SNPs_info0.7-gls_phenoplier.tsv.gz
Saving logs in projects/asthma-copd/nbs/30_gls_phenoplier/jobs_output/GWAS_COPD_only_GLM_SNPs_info0.7-gls_phenoplier.log

Running for aco
Saving results in /opt/data/projects/asthma-copd/results/gls_phenoplier/gls/GWAS_ACO_GLM_SNPs_info0.7-gls_phenoplier.tsv.gz
Saving logs in projects/asthma-copd/nbs/30_gls_phenoplier/jobs_output/GWAS_ACO_GLM_SNPs_info0.7-gls_phenoplier.log

