In [1]:
# https://github.com/broadinstitute/gtex-v8

# The following Python modules are needed to run the notebooks:
#  numpy, pandas, scipy, ipython, jupyter, matplotlib, seaborn, qtl

# conda config --append channels conda-forge
# pip install qtl
# conda config --append channels bioconda
# conda install pybigwig
# conda install bx-python

# mkdir pdfs # for plots

import os, sys, glob
from pprint import pprint

# __file__ does not work for python notebooks
# python 3.4+ method
from pathlib import Path
folder = Path().resolve()

if os.path.basename(folder) == 'gtex-v8':
    data_folder = os.path.join(folder, 'data')
elif os.path.basename(folder) == 'data':
    data_folder = folder
os.listdir(data_folder)

['Fig3B_finemapping_exp_validation_data.txt',
 'Fig3C_CDX8.mainsnp.region.bothtfs.txt',
 'Fig3D_CDX8.aFC_EGR1expr.txt',
 'Fig5A_enrichment_summary.txt',
 'Fig5B_ld_region_count.txt',
 'Fig5C_eqtl_gwas_correlation_with_rcp_threshold.txt',
 'Fig5D_ukb_rare_variants_coding_model.txt',
 'Fig5E_coreg_trait_Pn.txt',
 'Fig5E_tissue_sharing_trait_Pn.txt',
 'Fig6A_mashr_tissues.txt.gz',
 'Fig6B_clustering_similarity_median_rand.txt',
 'Fig6C_all_top.z_lfsr.sig.pruned.txt.gz',
 'Fig6C_sqtls.z_lfsr.sig.pruned.txt.gz',
 'Fig6D_gtex_v8_all_eqtl_categories.txt',
 'Fig6_pairwise_roadmap_analysis',
 'Fig7B_Whole_Blood_eQTLs.torus_enrichment.txt',
 'Fig7B_Whole_Blood_Neutrophil_ieQTLs.torus_enrichment.txt',
 'Fig7B_Whole_Blood_Neutrophil_isQTLs.torus_enrichment.txt',
 'Fig7B_Whole_Blood_sQTLs.torus_enrichment.txt',
 'Fig7C_ieQTL_indep_enrichment_summary.txt',
 'Fig7E_Whole_Blood.Neutrophils.eQTL_ieQTL_GWAS_coloc.txt',
 'GTEx_Analysis_v8_eQTL.torus_enrichment.txt',
 'GTEx_Analysis_v8_eQTL.trans.torus_en

In [4]:
# Download relevant files

os.chdir(data_folder)
pprint(os.listdir('.'))
org_files = os.listdir('.')

['Fig3B_finemapping_exp_validation_data.txt',
 'Fig3C_CDX8.mainsnp.region.bothtfs.txt',
 'Fig3D_CDX8.aFC_EGR1expr.txt',
 'Fig5A_enrichment_summary.txt',
 'Fig5B_ld_region_count.txt',
 'Fig5C_eqtl_gwas_correlation_with_rcp_threshold.txt',
 'Fig5D_ukb_rare_variants_coding_model.txt',
 'Fig5E_coreg_trait_Pn.txt',
 'Fig5E_tissue_sharing_trait_Pn.txt',
 'Fig6A_mashr_tissues.txt.gz',
 'Fig6B_clustering_similarity_median_rand.txt',
 'Fig6C_all_top.z_lfsr.sig.pruned.txt.gz',
 'Fig6C_sqtls.z_lfsr.sig.pruned.txt.gz',
 'Fig6D_gtex_v8_all_eqtl_categories.txt',
 'Fig6_pairwise_roadmap_analysis',
 'Fig7B_Whole_Blood_eQTLs.torus_enrichment.txt',
 'Fig7B_Whole_Blood_Neutrophil_ieQTLs.torus_enrichment.txt',
 'Fig7B_Whole_Blood_Neutrophil_isQTLs.torus_enrichment.txt',
 'Fig7B_Whole_Blood_sQTLs.torus_enrichment.txt',
 'Fig7C_ieQTL_indep_enrichment_summary.txt',
 'Fig7E_Whole_Blood.Neutrophils.eQTL_ieQTL_GWAS_coloc.txt',
 'GTEx_Analysis_v8_eQTL.torus_enrichment.txt',
 'GTEx_Analysis_v8_eQTL.trans.torus_en

In [5]:
# QTLs
!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_eQTL.tar && \
    tar xf GTEx_Analysis_v8_eQTL.tar && rm GTEx_Analysis_v8_eQTL.tar

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_sQTL.tar && \
    tar xf GTEx_Analysis_v8_sQTL.tar && rm GTEx_Analysis_v8_sQTL.tar

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_eQTL_independent.tar && \
    tar xf GTEx_Analysis_v8_eQTL_independent.tar && rm GTEx_Analysis_v8_eQTL_independent.tar

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_sQTL_independent.tar && \
    tar xf GTEx_Analysis_v8_sQTL_independent.tar && rm GTEx_Analysis_v8_sQTL_independent.tar

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_trans_eGenes_fdr05.txt

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_trans_sGenes_fdr05.txt

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_eQTL_expression_matrices.tar && \
    tar xf GTEx_Analysis_v8_eQTL_expression_matrices.tar && rm GTEx_Analysis_v8_eQTL_expression_matrices.tar

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_eQTL_covariates.tar.gz && \
    tar xf GTEx_Analysis_v8_eQTL_covariates.tar.gz && rm GTEx_Analysis_v8_eQTL_covariates.tar.gz

!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_sQTL_groups.tar.gz && \
    tar xf GTEx_Analysis_v8_sQTL_groups.tar.gz && rm GTEx_Analysis_v8_sQTL_groups.tar.gz

# fine mapping results
!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_v8_finemapping_CAVIAR.tar && \
    tar xf GTEx_v8_finemapping_CAVIAR.tar && rm GTEx_v8_finemapping_CAVIAR.tar
!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_v8_finemapping_CaVEMaN.tar && \
    tar xf GTEx_v8_finemapping_CaVEMaN.tar && rm GTEx_v8_finemapping_CaVEMaN.tar
!wget https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_v8_finemapping_DAPG.tar && \
    tar xf GTEx_v8_finemapping_DAPG.tar && rm GTEx_v8_finemapping_DAPG.tar

# annotation
!wget https://storage.googleapis.com/gtex_analysis_v8/reference/gencode.v26.GRCh38.genes.gtf
!wget https://storage.googleapis.com/gtex_analysis_v8/reference/WGS_Feature_overlap_collapsed_VEP_short_4torus.MAF01.txt.gz

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/zeno/.wget-hsts'. HSTS will be disabled.
--2020-02-09 22:11:36--  https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Analysis_v8_eQTL.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 2607:f8b0:400a:800::2010, 172.217.3.208
Connecting to storage.googleapis.com (storage.googleapis.com)|2607:f8b0:400a:800::2010|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1562828800 (1.5G) [application/x-tar]
Saving to: ‘GTEx_Analysis_v8_eQTL.tar’


2020-02-09 22:17:50 (3.98 MB/s) - ‘GTEx_Analysis_v8_eQTL.tar’ saved [1562828800/1562828800]

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/zeno/.wget-hsts'. HSTS will be disabled.
--2020-02-09 22:18:27--  https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_Ana

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/zeno/.wget-hsts'. HSTS will be disabled.
--2020-02-09 22:31:51--  https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/GTEx_v8_finemapping_CaVEMaN.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 2607:f8b0:400a:800::2010, 172.217.3.176
Connecting to storage.googleapis.com (storage.googleapis.com)|2607:f8b0:400a:800::2010|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18196480 (17M) [application/x-tar]
Saving to: ‘GTEx_v8_finemapping_CaVEMaN.tar’


2020-02-09 22:32:04 (1.40 MB/s) - ‘GTEx_v8_finemapping_CaVEMaN.tar’ saved [18196480/18196480]

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/zeno/.wget-hsts'. HSTS will be disabled.
--2020-02-09 22:32:04--  https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_da

In [7]:
# this is close, but not the original VCF file
!wget https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz

# this file not exist
# !wget https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.SHAPEIT2_phased.vcf.gz

# Need to get it from https://app.terra.bio/ - which has some important rules
# * only individual accounts - google logon
# * no autologons - no leaving logged in
# * Do not access TCGA controlled access data unless dbGaP authorized
# * Do not upload personal datasets to the tutorials and do not add worflows (Method Configs)

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/zeno/.wget-hsts'. HSTS will be disabled.
--2020-02-09 22:55:41--  https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.SHAPEIT2_phased.vcf.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 2607:f8b0:400a:801::2010, 216.58.217.48
Connecting to storage.googleapis.com (storage.googleapis.com)|2607:f8b0:400a:801::2010|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2020-02-09 22:55:42 ERROR 404: Not Found.

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/zeno/.wget-hsts'. HSTS will be disabled.
--2020-02-09 22:55:42--  https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz
Resolving s