In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle
from glob import glob
from concurrent.futures import ProcessPoolExecutor, as_completed

import numpy as np
import pandas as pd
from scipy import stats

import settings as conf
from utils import is_number, chunker

# Load S-PrediXcan results

## From Rapid GWAS project

In [3]:
from results.spredixcan import PhenoResults

In [4]:
_path = os.path.join(conf.SPREDIXCAN_RESULTS_DIR['RapidGWASProject'] + '/*')
display(_path)
all_spredixcan_results_dirs = glob(_path)
display(len(all_spredixcan_results_dirs))
assert len(all_spredixcan_results_dirs) == conf.SPREDIXCAN_EXPECTED_PHENOTYPES['RapidGWASProject']

'/mnt/phenomexcan_base/results/spredixcan/rapid_gwas_project/*'

4049

In [5]:
all_spredixcan_phenotypes = [PhenoResults(p) for p in all_spredixcan_results_dirs]

display(len(all_spredixcan_phenotypes))
assert len(all_spredixcan_phenotypes) == conf.SPREDIXCAN_EXPECTED_PHENOTYPES['RapidGWASProject']

4049

## From GTEx GWAS manuscript

In [6]:
_path = os.path.join(conf.SPREDIXCAN_RESULTS_DIR['GTEX_GWAS'] + '/*')
display(_path)
all_extra_results_dirs = glob(_path)
display(len(all_extra_results_dirs))
assert len(all_extra_results_dirs) == conf.SPREDIXCAN_EXPECTED_PHENOTYPES['GTEX_GWAS']

'/mnt/phenomexcan_base/results/spredixcan/gtex_gwas/*'

42

In [7]:
all_extra_results_dirs[:5]

['/mnt/phenomexcan_base/results/spredixcan/gtex_gwas/PGC_ADHD_EUR_2017',
 '/mnt/phenomexcan_base/results/spredixcan/gtex_gwas/CNCR_Insomnia_all',
 '/mnt/phenomexcan_base/results/spredixcan/gtex_gwas/IMMUNOBASE_Systemic_lupus_erythematosus_hg19',
 '/mnt/phenomexcan_base/results/spredixcan/gtex_gwas/Astle_et_al_2016_Reticulocyte_count',
 '/mnt/phenomexcan_base/results/spredixcan/gtex_gwas/Astle_et_al_2016_Granulocyte_count']

In [8]:
_file_pattern = 'spredixcan_igwas_gtexmashrv8_(?P<code>[^/]+)__PM__(?P<tissue>.+)\.csv$'
all_extra_phenotypes = [PhenoResults(p, _file_pattern) for p in all_extra_results_dirs]
all_extra_phenotypes_plain_names = pd.Index([p.pheno_info.get_plain_name() for p in all_extra_phenotypes])

display(len(all_extra_phenotypes))
assert len(all_extra_phenotypes) == conf.SMULTIXCAN_EXPECTED_PHENOTYPES['GTEX_GWAS']

42

# S-PrediXcan: pvalues

In [9]:
from results.spredixcan import PhenoResults
from results.gtex_model import GTEXModel

In [10]:
OUTPUT_FOLDER = os.path.join(conf.GENE_ASSOC_DIR, f'spredixcan')
display(OUTPUT_FOLDER)
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

'/mnt/phenomexcan_base/gene_assoc/spredixcan'

In [11]:
OUTPUT_FILE_FORMAT = 'spredixcan-{tissue}-{column}.{format}'
display(OUTPUT_FILE_FORMAT)

'spredixcan-{tissue}-{column}.{format}'

In [12]:
ALL_TISSUES = GTEXModel.get_tissues(conf.GTEX_MODELS_DIR)
assert len(ALL_TISSUES) == 49

### Compute results

In [13]:
# import threading
# from queue import Queue

from utils import simplify_string_for_hdf5

In [14]:
# to_write = Queue(maxsize=5)

In [15]:
def _get_combined_results(phenos, tissue, column):
    return {
        pheno.pheno_info.get_plain_name():
            pheno.get_tissue_data(tissue, cols=[column], index_col='gene_simple')
        for pheno in phenos
    }

In [16]:
# def dataframe_creator(pqueue):
def dataframe_creator(results, tissue_name, column_name):
#     for results, tissue_name, column_name in iter(pqueue.get, None):
    log_prefix = f'[{tissue_name} - {column_name}]'

    _n_expected_phenos = np.sum(list(conf.SMULTIXCAN_EXPECTED_PHENOTYPES.values()))
    assert len(results) == _n_expected_phenos, len(results)

    spredixcan = pd.DataFrame(results)
    spredixcan.index.rename('gene_name', inplace=True)
    assert spredixcan.index.is_unique

    # hdf5
    spredixcan_filename = OUTPUT_FILE_FORMAT.format(
        tissue=tissue_name,
        column=column_name,
        format='h5'
    )
    spredixcan_filename = os.path.join(OUTPUT_FOLDER, spredixcan_filename)

    print(f'  {log_prefix}, saving to: {spredixcan_filename}', flush=True)
    with pd.HDFStore(spredixcan_filename, mode='w', complevel=1) as store:
        for col in spredixcan.columns:
            clean_col = simplify_string_for_hdf5(col)
            store[clean_col] = spredixcan[col]#.astype(float)

In [17]:
def _run(tissue, column, phenotype_chunks, n_jobs=conf.N_JOBS_HIGH):
    all_results = {}
    with ProcessPoolExecutor(max_workers=n_jobs) as executor:
        tasks = [executor.submit(_get_combined_results, chunk, tissue, column) for chunk in phenotype_chunks]
        for future in as_completed(tasks):
            res = future.result()
            all_results.update(res)

    dataframe_creator(all_results, tissue, column)
#     to_write.put((all_results, tissue, column))

In [18]:
def run_all(tissues, phenotype_chunks, n_jobs=conf.N_JOBS_HIGH):
    for tissue in tissues:
        print(tissue, flush=True)
        
        for column in ('pvalue', 'zscore', 'effect_size'):
            _run(tissue, column, phenotype_chunks, n_jobs)
    
#     to_write.put(None)

In [19]:
# phenotype_chunks = chunker(all_spredixcan_phenotypes[:5] + all_extra_phenotypes[:5], 2)
phenotype_chunks = chunker(all_spredixcan_phenotypes + all_extra_phenotypes, 25)

In [20]:
phenotype_chunks = list(phenotype_chunks)

In [21]:
tissues = GTEXModel.get_tissues(conf.GTEX_MODELS_DIR)
#tissues = tissues[:2]

In [22]:
# writing_thread = threading.Thread(target=dataframe_creator, args=((to_write),))
# writing_thread.start()

In [23]:
run_all(tissues, phenotype_chunks)

Thyroid


  [Thyroid - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Thyroid-pvalue.h5


  [Thyroid - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Thyroid-zscore.h5


  [Thyroid - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Thyroid-effect_size.h5


Skin_Not_Sun_Exposed_Suprapubic


  [Skin_Not_Sun_Exposed_Suprapubic - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Skin_Not_Sun_Exposed_Suprapubic-pvalue.h5


  [Skin_Not_Sun_Exposed_Suprapubic - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Skin_Not_Sun_Exposed_Suprapubic-zscore.h5


  [Skin_Not_Sun_Exposed_Suprapubic - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Skin_Not_Sun_Exposed_Suprapubic-effect_size.h5


Colon_Transverse


  [Colon_Transverse - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Colon_Transverse-pvalue.h5


  [Colon_Transverse - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Colon_Transverse-zscore.h5


  [Colon_Transverse - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Colon_Transverse-effect_size.h5


Brain_Amygdala


  [Brain_Amygdala - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Amygdala-pvalue.h5


  [Brain_Amygdala - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Amygdala-zscore.h5


  [Brain_Amygdala - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Amygdala-effect_size.h5


Ovary


  [Ovary - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Ovary-pvalue.h5


  [Ovary - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Ovary-zscore.h5


  [Ovary - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Ovary-effect_size.h5


Colon_Sigmoid


  [Colon_Sigmoid - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Colon_Sigmoid-pvalue.h5


  [Colon_Sigmoid - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Colon_Sigmoid-zscore.h5


  [Colon_Sigmoid - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Colon_Sigmoid-effect_size.h5


Lung


  [Lung - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Lung-pvalue.h5


  [Lung - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Lung-zscore.h5


  [Lung - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Lung-effect_size.h5


Testis


  [Testis - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Testis-pvalue.h5


  [Testis - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Testis-zscore.h5


  [Testis - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Testis-effect_size.h5


Muscle_Skeletal


  [Muscle_Skeletal - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Muscle_Skeletal-pvalue.h5


  [Muscle_Skeletal - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Muscle_Skeletal-zscore.h5


  [Muscle_Skeletal - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Muscle_Skeletal-effect_size.h5


Pancreas


  [Pancreas - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Pancreas-pvalue.h5


  [Pancreas - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Pancreas-zscore.h5


  [Pancreas - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Pancreas-effect_size.h5


Adipose_Subcutaneous


  [Adipose_Subcutaneous - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adipose_Subcutaneous-pvalue.h5


  [Adipose_Subcutaneous - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adipose_Subcutaneous-zscore.h5


  [Adipose_Subcutaneous - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adipose_Subcutaneous-effect_size.h5


Brain_Anterior_cingulate_cortex_BA24


  [Brain_Anterior_cingulate_cortex_BA24 - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Anterior_cingulate_cortex_BA24-pvalue.h5


  [Brain_Anterior_cingulate_cortex_BA24 - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Anterior_cingulate_cortex_BA24-zscore.h5


  [Brain_Anterior_cingulate_cortex_BA24 - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Anterior_cingulate_cortex_BA24-effect_size.h5


Whole_Blood


  [Whole_Blood - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Whole_Blood-pvalue.h5


  [Whole_Blood - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Whole_Blood-zscore.h5


  [Whole_Blood - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Whole_Blood-effect_size.h5


Breast_Mammary_Tissue


  [Breast_Mammary_Tissue - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Breast_Mammary_Tissue-pvalue.h5


  [Breast_Mammary_Tissue - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Breast_Mammary_Tissue-zscore.h5


  [Breast_Mammary_Tissue - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Breast_Mammary_Tissue-effect_size.h5


Brain_Nucleus_accumbens_basal_ganglia


  [Brain_Nucleus_accumbens_basal_ganglia - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Nucleus_accumbens_basal_ganglia-pvalue.h5


  [Brain_Nucleus_accumbens_basal_ganglia - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Nucleus_accumbens_basal_ganglia-zscore.h5


  [Brain_Nucleus_accumbens_basal_ganglia - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Nucleus_accumbens_basal_ganglia-effect_size.h5


Kidney_Cortex


  [Kidney_Cortex - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Kidney_Cortex-pvalue.h5


  [Kidney_Cortex - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Kidney_Cortex-zscore.h5


  [Kidney_Cortex - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Kidney_Cortex-effect_size.h5


Brain_Substantia_nigra


  [Brain_Substantia_nigra - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Substantia_nigra-pvalue.h5


  [Brain_Substantia_nigra - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Substantia_nigra-zscore.h5


  [Brain_Substantia_nigra - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Substantia_nigra-effect_size.h5


Prostate


  [Prostate - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Prostate-pvalue.h5


  [Prostate - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Prostate-zscore.h5


  [Prostate - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Prostate-effect_size.h5


Stomach


  [Stomach - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Stomach-pvalue.h5


  [Stomach - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Stomach-zscore.h5


  [Stomach - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Stomach-effect_size.h5


Brain_Caudate_basal_ganglia


  [Brain_Caudate_basal_ganglia - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Caudate_basal_ganglia-pvalue.h5


  [Brain_Caudate_basal_ganglia - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Caudate_basal_ganglia-zscore.h5


  [Brain_Caudate_basal_ganglia - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Caudate_basal_ganglia-effect_size.h5


Nerve_Tibial


  [Nerve_Tibial - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Nerve_Tibial-pvalue.h5


  [Nerve_Tibial - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Nerve_Tibial-zscore.h5


  [Nerve_Tibial - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Nerve_Tibial-effect_size.h5


Brain_Cerebellum


  [Brain_Cerebellum - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cerebellum-pvalue.h5


  [Brain_Cerebellum - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cerebellum-zscore.h5


  [Brain_Cerebellum - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cerebellum-effect_size.h5


Cells_Cultured_fibroblasts


  [Cells_Cultured_fibroblasts - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Cells_Cultured_fibroblasts-pvalue.h5


  [Cells_Cultured_fibroblasts - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Cells_Cultured_fibroblasts-zscore.h5


  [Cells_Cultured_fibroblasts - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Cells_Cultured_fibroblasts-effect_size.h5


Heart_Left_Ventricle


  [Heart_Left_Ventricle - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Heart_Left_Ventricle-pvalue.h5


  [Heart_Left_Ventricle - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Heart_Left_Ventricle-zscore.h5


  [Heart_Left_Ventricle - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Heart_Left_Ventricle-effect_size.h5


Pituitary


  [Pituitary - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Pituitary-pvalue.h5


  [Pituitary - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Pituitary-zscore.h5


  [Pituitary - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Pituitary-effect_size.h5


Brain_Hippocampus


  [Brain_Hippocampus - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Hippocampus-pvalue.h5


  [Brain_Hippocampus - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Hippocampus-zscore.h5


  [Brain_Hippocampus - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Hippocampus-effect_size.h5


Small_Intestine_Terminal_Ileum


  [Small_Intestine_Terminal_Ileum - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Small_Intestine_Terminal_Ileum-pvalue.h5


  [Small_Intestine_Terminal_Ileum - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Small_Intestine_Terminal_Ileum-zscore.h5


  [Small_Intestine_Terminal_Ileum - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Small_Intestine_Terminal_Ileum-effect_size.h5


Adrenal_Gland


  [Adrenal_Gland - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adrenal_Gland-pvalue.h5


  [Adrenal_Gland - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adrenal_Gland-zscore.h5


  [Adrenal_Gland - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adrenal_Gland-effect_size.h5


Artery_Aorta


  [Artery_Aorta - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Aorta-pvalue.h5


  [Artery_Aorta - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Aorta-zscore.h5


  [Artery_Aorta - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Aorta-effect_size.h5


Liver


  [Liver - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Liver-pvalue.h5


  [Liver - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Liver-zscore.h5


  [Liver - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Liver-effect_size.h5


Artery_Coronary


  [Artery_Coronary - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Coronary-pvalue.h5


  [Artery_Coronary - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Coronary-zscore.h5


  [Artery_Coronary - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Coronary-effect_size.h5


Brain_Spinal_cord_cervical_c-1


  [Brain_Spinal_cord_cervical_c-1 - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Spinal_cord_cervical_c-1-pvalue.h5


  [Brain_Spinal_cord_cervical_c-1 - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Spinal_cord_cervical_c-1-zscore.h5


  [Brain_Spinal_cord_cervical_c-1 - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Spinal_cord_cervical_c-1-effect_size.h5


Heart_Atrial_Appendage


  [Heart_Atrial_Appendage - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Heart_Atrial_Appendage-pvalue.h5


  [Heart_Atrial_Appendage - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Heart_Atrial_Appendage-zscore.h5


  [Heart_Atrial_Appendage - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Heart_Atrial_Appendage-effect_size.h5


Artery_Tibial


  [Artery_Tibial - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Tibial-pvalue.h5


  [Artery_Tibial - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Tibial-zscore.h5


  [Artery_Tibial - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Artery_Tibial-effect_size.h5


Brain_Cortex


  [Brain_Cortex - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cortex-pvalue.h5


  [Brain_Cortex - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cortex-zscore.h5


  [Brain_Cortex - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cortex-effect_size.h5


Uterus


  [Uterus - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Uterus-pvalue.h5


  [Uterus - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Uterus-zscore.h5


  [Uterus - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Uterus-effect_size.h5


Brain_Putamen_basal_ganglia


  [Brain_Putamen_basal_ganglia - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Putamen_basal_ganglia-pvalue.h5


  [Brain_Putamen_basal_ganglia - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Putamen_basal_ganglia-zscore.h5


  [Brain_Putamen_basal_ganglia - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Putamen_basal_ganglia-effect_size.h5


Minor_Salivary_Gland


  [Minor_Salivary_Gland - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Minor_Salivary_Gland-pvalue.h5


  [Minor_Salivary_Gland - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Minor_Salivary_Gland-zscore.h5


  [Minor_Salivary_Gland - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Minor_Salivary_Gland-effect_size.h5


Brain_Cerebellar_Hemisphere


  [Brain_Cerebellar_Hemisphere - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cerebellar_Hemisphere-pvalue.h5


  [Brain_Cerebellar_Hemisphere - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cerebellar_Hemisphere-zscore.h5


  [Brain_Cerebellar_Hemisphere - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Cerebellar_Hemisphere-effect_size.h5


Vagina


  [Vagina - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Vagina-pvalue.h5


  [Vagina - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Vagina-zscore.h5


  [Vagina - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Vagina-effect_size.h5


Esophagus_Mucosa


  [Esophagus_Mucosa - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Mucosa-pvalue.h5


  [Esophagus_Mucosa - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Mucosa-zscore.h5


  [Esophagus_Mucosa - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Mucosa-effect_size.h5


Brain_Frontal_Cortex_BA9


  [Brain_Frontal_Cortex_BA9 - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Frontal_Cortex_BA9-pvalue.h5


  [Brain_Frontal_Cortex_BA9 - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Frontal_Cortex_BA9-zscore.h5


  [Brain_Frontal_Cortex_BA9 - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Frontal_Cortex_BA9-effect_size.h5


Cells_EBV-transformed_lymphocytes


  [Cells_EBV-transformed_lymphocytes - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Cells_EBV-transformed_lymphocytes-pvalue.h5


  [Cells_EBV-transformed_lymphocytes - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Cells_EBV-transformed_lymphocytes-zscore.h5


  [Cells_EBV-transformed_lymphocytes - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Cells_EBV-transformed_lymphocytes-effect_size.h5


Skin_Sun_Exposed_Lower_leg


  [Skin_Sun_Exposed_Lower_leg - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Skin_Sun_Exposed_Lower_leg-pvalue.h5


  [Skin_Sun_Exposed_Lower_leg - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Skin_Sun_Exposed_Lower_leg-zscore.h5


  [Skin_Sun_Exposed_Lower_leg - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Skin_Sun_Exposed_Lower_leg-effect_size.h5


Spleen


  [Spleen - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Spleen-pvalue.h5


  [Spleen - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Spleen-zscore.h5


  [Spleen - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Spleen-effect_size.h5


Esophagus_Muscularis


  [Esophagus_Muscularis - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Muscularis-pvalue.h5


  [Esophagus_Muscularis - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Muscularis-zscore.h5


  [Esophagus_Muscularis - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Muscularis-effect_size.h5


Adipose_Visceral_Omentum


  [Adipose_Visceral_Omentum - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adipose_Visceral_Omentum-pvalue.h5


  [Adipose_Visceral_Omentum - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adipose_Visceral_Omentum-zscore.h5


  [Adipose_Visceral_Omentum - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Adipose_Visceral_Omentum-effect_size.h5


Brain_Hypothalamus


  [Brain_Hypothalamus - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Hypothalamus-pvalue.h5


  [Brain_Hypothalamus - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Hypothalamus-zscore.h5


  [Brain_Hypothalamus - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Brain_Hypothalamus-effect_size.h5


Esophagus_Gastroesophageal_Junction


  [Esophagus_Gastroesophageal_Junction - pvalue], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Gastroesophageal_Junction-pvalue.h5


  [Esophagus_Gastroesophageal_Junction - zscore], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Gastroesophageal_Junction-zscore.h5


  [Esophagus_Gastroesophageal_Junction - effect_size], saving to: /mnt/phenomexcan_base/gene_assoc/spredixcan/spredixcan-Esophagus_Gastroesophageal_Junction-effect_size.h5


In [24]:
# writing_thread.join()

## Testing

In [25]:
n_expected_phenos = np.sum(list(conf.SMULTIXCAN_EXPECTED_PHENOTYPES.values()))

In [26]:
n_expected_phenos

4091

In [27]:
output_hdf5_file = os.path.join(OUTPUT_FOLDER, 'spredixcan-Thyroid-pvalue.h5')

with pd.HDFStore(output_hdf5_file, mode='r') as store:
    store_keys = list(store.keys())
    assert len(store_keys) == n_expected_phenos
    display(store_keys[:5])
    
    clean_col = simplify_string_for_hdf5('N02-Diagnoses_main_ICD10_N02_Recurrent_and_persistent_haematuria')
    data = store[clean_col]
    assert data.shape == (15289,), data.shape
    assert data.loc['ENSG00000213965'] == 0.00023756504804916094, data.loc['ENSG00000213965']
    assert pd.isnull(data.loc['ENSG00000198670'])
    assert data.loc['ENSG00000177025'] == 1.586957013502016e-05, data.loc['ENSG00000177025']
    
    clean_col = simplify_string_for_hdf5('MAGNETIC_LDL.C')
    data = store[clean_col]
    assert data.shape == (15289,), data.shape
    assert data.loc['ENSG00000113163'] == 1.3600016190892495e-13, data.loc['ENSG00000113163']
    assert pd.isnull(data.loc['ENSG00000223510'])
    assert data.loc['ENSG00000204241'] == 0.9990521310244208, data.loc['ENSG00000204241']

['/c100001_raw_Food_weight',
 '/c100002_raw_Energy',
 '/c100003_raw_Protein',
 '/c100004_raw_Fat',
 '/c100005_raw_Carbohydrate']

In [28]:
output_hdf5_file = os.path.join(OUTPUT_FOLDER, 'spredixcan-Thyroid-zscore.h5')

with pd.HDFStore(output_hdf5_file, mode='r') as store:
    store_keys = list(store.keys())
    assert len(store_keys) == n_expected_phenos
    display(store_keys[:5])
    
    clean_col = simplify_string_for_hdf5('N02-Diagnoses_main_ICD10_N02_Recurrent_and_persistent_haematuria')
    data = store[clean_col]
    assert data.shape == (15289,), data.shape
    assert data.loc['ENSG00000213965'] == -3.6753054157625686, data.loc['ENSG00000213965']
    assert pd.isnull(data.loc['ENSG00000198670'])
    assert data.loc['ENSG00000177025'] == 4.316259089446458, data.loc['ENSG00000177025']
    
    clean_col = simplify_string_for_hdf5('MAGNETIC_LDL.C')
    data = store[clean_col]
    assert data.shape == (15289,), data.shape
    assert data.loc['ENSG00000113163'] == -7.400179862976074, data.loc['ENSG00000113163']
    assert pd.isnull(data.loc['ENSG00000223510'])
    assert data.loc['ENSG00000204241'] == 0.0011879778668467532, data.loc['ENSG00000204241']

['/c100001_raw_Food_weight',
 '/c100002_raw_Energy',
 '/c100003_raw_Protein',
 '/c100004_raw_Fat',
 '/c100005_raw_Carbohydrate']

In [29]:
output_hdf5_file = os.path.join(OUTPUT_FOLDER, 'spredixcan-Thyroid-effect_size.h5')

with pd.HDFStore(output_hdf5_file, mode='r') as store:
    store_keys = list(store.keys())
    assert len(store_keys) == n_expected_phenos
    display(store_keys[:5])
    
    clean_col = simplify_string_for_hdf5('N02-Diagnoses_main_ICD10_N02_Recurrent_and_persistent_haematuria')
    data = store[clean_col]
    assert data.shape == (15289,), data.shape
    assert data.loc['ENSG00000213965'] == -0.0011122695712738851, data.loc['ENSG00000213965']
    assert pd.isnull(data.loc['ENSG00000198670'])
    assert data.loc['ENSG00000177025'] == 0.0013606910719667048, data.loc['ENSG00000177025']
    
    clean_col = simplify_string_for_hdf5('MAGNETIC_LDL.C')
    data = store[clean_col]
    assert data.shape == (15289,), data.shape
    assert pd.isnull(data.loc['ENSG00000113163'])
    assert pd.isnull(data.loc['ENSG00000223510'])
    assert pd.isnull(data.loc['ENSG00000204241'])

['/c100001_raw_Food_weight',
 '/c100002_raw_Energy',
 '/c100003_raw_Protein',
 '/c100004_raw_Fat',
 '/c100005_raw_Carbohydrate']

In [30]:
output_hdf5_file = os.path.join(OUTPUT_FOLDER, 'spredixcan-Skin_Not_Sun_Exposed_Suprapubic-pvalue.h5')

with pd.HDFStore(output_hdf5_file, mode='r') as store:
    store_keys = list(store.keys())
    assert len(store_keys) == n_expected_phenos
    display(store_keys[:5])
    
    clean_col = simplify_string_for_hdf5('N02-Diagnoses_main_ICD10_N02_Recurrent_and_persistent_haematuria')
    data = store[clean_col]
    assert data.shape == (14920,), data.shape
    assert data.loc['ENSG00000214575'] == 0.999887282076106, data.loc['ENSG00000214575']
    assert pd.isnull(data.loc['ENSG00000231131'])
    assert data.loc['ENSG00000177025'] == 2.0403800371097046e-05, data.loc['ENSG00000177025']

['/c100001_raw_Food_weight',
 '/c100002_raw_Energy',
 '/c100003_raw_Protein',
 '/c100004_raw_Fat',
 '/c100005_raw_Carbohydrate']

The code below was used to write the assert above; see for each gene if first and last (min and max) correspond to sign above

In [31]:
rapid_gwas_dir = conf.SPREDIXCAN_RESULTS_DIR['RapidGWASProject']
gtex_gwas_dir = conf.SPREDIXCAN_RESULTS_DIR['GTEX_GWAS']

In [32]:
%%bash -s "$rapid_gwas_dir"
cd $1/N02
head -1 N02-gtex_v8-Thyroid-2018_10.csv | cut -f1-5 -d, | column -s, -t
echo ""
cat N02-gtex_v8-Thyroid-2018_10.csv | grep ENSG00000213965 | cut -f1-5 -d, | column -s, -t
echo ""
cat N02-gtex_v8-Thyroid-2018_10.csv | grep ENSG00000198670 | cut -f1-5 -d, | column -s, -t
echo ""
cat N02-gtex_v8-Thyroid-2018_10.csv | grep ENSG00000177025 | cut -f1-5 -d, | column -s, -t

gene  gene_name  zscore  effect_size  pvalue

ENSG00000213965.3  NUDT19  -3.675305415762569  -0.0011122695712738851  0.00023756504804916094

ENSG00000198670.11  LPA  NA  NA  NA

ENSG00000177025.3  C19orf18  4.316259089446458  0.0013606910719667048  1.586957013502016e-05


In [33]:
%%bash -s "$rapid_gwas_dir"
cd $1/N02
head -1 N02-gtex_v8-Skin_Not_Sun_Exposed_Suprapubic-2018_10.csv | cut -f1-5 -d, | column -s, -t
echo ""
cat N02-gtex_v8-Skin_Not_Sun_Exposed_Suprapubic-2018_10.csv | grep ENSG00000214575 | cut -f1-5 -d, | column -s, -t
echo ""
cat N02-gtex_v8-Skin_Not_Sun_Exposed_Suprapubic-2018_10.csv | grep ENSG00000231131 | cut -f1-5 -d, | column -s, -t
echo ""
cat N02-gtex_v8-Skin_Not_Sun_Exposed_Suprapubic-2018_10.csv | grep ENSG00000177025 | cut -f1-5 -d, | column -s, -t

gene  gene_name  zscore  effect_size  pvalue

ENSG00000214575.9  CPEB1  -0.00014127096801530567  -1.8697265305035635e-07  0.9998872820761059

ENSG00000231131.6  LINC01468  NA  NA  NA

ENSG00000177025.3  C19orf18  4.260425591320433  0.0009033853136217764  2.0403800371097043e-05


In [34]:
%%bash -s "$gtex_gwas_dir"
cd $1/MAGNETIC_LDL.C
head -1 spredixcan_igwas_gtexmashrv8_MAGNETIC_LDL.C__PM__Thyroid.csv | cut -f1-5 -d, | column -s, -t
echo ""
cat spredixcan_igwas_gtexmashrv8_MAGNETIC_LDL.C__PM__Thyroid.csv | grep ENSG00000113163 | cut -f1-5 -d, | column -s, -t
echo ""
cat spredixcan_igwas_gtexmashrv8_MAGNETIC_LDL.C__PM__Thyroid.csv | grep ENSG00000223510 | cut -f1-5 -d, | column -s, -t
echo ""
cat spredixcan_igwas_gtexmashrv8_MAGNETIC_LDL.C__PM__Thyroid.csv | grep ENSG00000204241 | cut -f1-5 -d, | column -s, -t

gene  gene_name  zscore  effect_size  pvalue

ENSG00000113163.15  COL4A3BP  -7.400179862976074  NA  1.3600016190892497e-13

ENSG00000223510.6  CDRT15  NA  NA  NA

ENSG00000204241.7  RP11-713P17.3  0.0011879778668467532  NA  0.9990521310244207
