In [None]:
# Dr. Wheeler:
# Claudia created two new files, "formatted_714.1.tsv" and "large_formatted_714.1.tsv".
# I ran both of these through S-PrediXcan using the specifications in the final cell.
# Both times, it said only 1 % or 2% of the model's SNPs were used.
# This was also the case when I re-ran two older files, "formatted_261.2.tsv" and "formatted_250.tsv".
# I tried running each file with different matrices and models from mashr but still came up with 1-2%.
# As it stands, the input we have seems to be incompatible with S-PrediXcan even though I can see no meaningful difference
# between it and the META file that had been formatted for the tool.
# Please email me if I need to clarify anything. Thank you again for your help.

# Please run the code in the following docstrings from the command line.
# Apologies, I have not yet nailed down the jupyter notebook functionality.

In [None]:
# instructions to download miniconda3 per their website

# from the terminal run these lines of code:
"""
mkdir -p ~/miniconda3
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
rm ~/miniconda3/miniconda.sh

source ~/miniconda3/bin/activate
conda init --all
source ~/.bashrc
"""

In [None]:
# set up MetaXcan

# clone the repository
"""
cd /home/jupyter/packages
git clone https://github.com/hakyimlab/MetaXcan.git
"""

# create a conda environment for MetaXcan
"""
cd /home/jupyter/packages/MetaXcan/software
conda env create -f conda_env.yaml
"""

# get covariance matrices via
"""
cd /home/jupyter/packages/MetaXcan/software
mkdir allofus_test
cd allofus_test
wget https://zenodo.org/record/3518299/files/mashr_eqtl.tar?download=1 -O mashr_eqtl.tar
tar -xvpf mashr_eqtl.tar
rm mashr_eqtl.tar
"""

### IGNORE BELOW IF TESTING ON COMMAND LINE ###

# then, open up a jupyter notebook
# click on the 'Kernel' tab at the top
# hover over 'Change kernel'
# click on 'Python [conda env:imlabtools]'

In [None]:
# run S-PrediXcan with test data

# get test data from bucket
"""
cd /home/jupyter/packages/MetaXcan/software/allofus_test
gsutil -m cp gs://fc-secure-bb61452f-d5e2-4d26-9227-6a9444241af8/data/*.tsv .
"""

# activate conda venv
"""
cd /home/jupyter/packages/MetaXcan/software
conda activate imlabtools
"""

In [None]:
"""
python summary-gwas-imputation/src/gwas_parsing.py \
-gwas_file data/formatted_gtex_714.1.tsv \
-snp_reference_metadata data/variant_metadata.txt.gz METADATA \
-output_column_map ID variant_id \
-output_column_map REF non_effect_allele \
-output_column_map ALT effect_allele \
-output_column_map BETA effect_size \
-output_column_map SE standard_error \
-output_column_map \#CHROM chromosome \
--chromosome_format \
-output_column_map POS position \
-output_column_map AF_Allele2 frequency \
--insert_value sample_size 151106 --insert_value n_cases 15406 \
-output_order variant_id panel_variant_id chromosome position effect_allele non_effect_allele frequency pvalue zscore effect_size standard_error sample_size n_cases \
-output META_phenotype_CV_404_ACAF_sumstats_for_S-PrediXcan.tsv.gz
"""

In [None]:
# run S-PrediXcan
# you can change the --model_db_path and --covariance based on gwas data
# reference files are in /home/jupyter/packages/MetaXcan/software/allofus_test/eqtl/mashr/
"""
python MetaXcan/software/SPrediXcan.py \
--gwas_file META_phenotype_CV_404_ACAF_sumstats_for_S-PrediXcan.tsv.gz \
--snp_column panel_variant_id \
--effect_allele_column effect_allele \
--non_effect_allele_column non_effect_allele \
--beta_column effect_size \
--se_column standard_error \
--model_db_path eqtl/mashr/mashr_Heart_Atrial_Appendage.db \
--covariance eqtl/mashr/mashr_Heart_Atrial_Appendage.txt.gz \
--keep_non_rsid \
--additional_output \
--model_db_snp_key varID \
--throw \
--output_file AoU_META_phenotype_CV_404_ACAF_Heart_Atrial_Appendage.csv
"""

# results are in /home/jupyter/packages/MetaXcan/software/allofus_test/results.csv

In [None]:
import os
os.chdir("/home/jupyter/packages")
os.system("python MetaXcan/software/SPrediXcan.py \
--gwas_file data/formatted_gtex_714.1.tsv \
--snp_column SNP \
--effect_allele_column ALT \
--non_effect_allele_column REF \
--beta_column BETA \
--se_column SE \
--model_db_path eqtl/mashr/mashr_Heart_Atrial_Appendage.db \
--covariance eqtl/mashr/mashr_Heart_Atrial_Appendage.txt.gz \
--keep_non_rsid \
--additional_output \
--model_db_snp_key varID \
--throw \
--output_file qqman-twas-input.csv")