In [1]:
[global]
parameter: susie_table = ""
parameter: out_vcf = ""
parameter: file_table = ""
parameter: out_file = ""
parameter: out_pre = ""
parameter: eqtl_vcf = ""
parameter: tissue = ''
parameter: container = ''
parameter: job_size = 1
parameter: walltime = "5h"
parameter: mem = "8G"
parameter: numThreads = 1

# Conversion of SuSie eQTL Objects to DAP-G VCF Format
In order to properly run fastENLOC we need to first convert eQTL results from each gene in a specific tissue into a VCF object with relevant information in the INFO column. To do this we need to start with a table of each gene and its corresponding eQTL susie file as the input.

It should be formatted like the file below

In [1]:
cat /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/eqtl_susie_table_ex.tsv

gene	susie_file
ENSG00000000419	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000000419.unisusie.fit.rds
ENSG00000000457	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000000457.unisusie.fit.rds
ENSG00000000938	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000000938.unisusie.fit.rds
ENSG00000000971	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000000971.unisusie.fit.rds
ENSG00000001036	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000001036.unisusie.fit.rds
ENSG00000001084	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000001084.unisusie.fit.rds
ENSG00000001167	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000001167.unisusie.fit.rds
ENSG00000001460	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000001460.unisusie.fit.rds
ENSG00000001461	/restricted/projectnb/casa/oaolayin/eQTL_Finemap/demo.ENSG00000001461.unisusie.fit.rds
ENSG00000001561	/restricted/projectnb/casa/oaolayin/eQTL_

In [None]:
[susie_to_dapg_1]
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
R: expand= "$[ ]", container = container
    library(susieR)
    library(stringr)
    
    susie_tbl = read.csv("$[susie_table]", sep = "\t")
    susie_files = susie_tbl$susie_file
    genes = susie_tbl$gene
    tissue = "$[tissue]"

    vcf_out = data.frame(chr=NULL, pos=NULL, var_id=NULL, ref = NULL, alt = NULL, info=NULL)
    for(i in seq(1, length(genes))) {
      gene = genes[i]
      ssie_obj = readRDS(susie_files[i])
      # now we get the credible set level values
      # get the average PIP for each of the credible sets
      sums_cs_pip = lapply(ssie_obj$dlpfc_eqtl$sets$cs, function(set) sum(ssie_obj$dlpfc_eqtl$pip[set]))
      # get length of each of the credible sets
      lengths_cs = lengths(ssie_obj$dlpfc_eqtl$sets$cs)
      for(i_var in seq(1, length(ssie_obj$dlpfc_eqtl$variable_name))) {
        var_id = str_replace_all(ssie_obj$dlpfc_eqtl$variable_name[i_var], ":", "_")
        chr = strsplit(var_id, "_")[[1]][1]
        pos = strsplit(var_id, "_")[[1]][2]
        ref = strsplit(var_id, "_")[[1]][3]
        alt = strsplit(var_id, "_")[[1]][4]
        pip = ssie_obj$dlpfc_eqtl$pip[i_var]
        cs_id = -1
        if (i_var %in% unlist(ssie_obj$dlpfc_eqtl$sets$cs)) {
          cs_id = names(ssie_obj$dlpfc_eqtl$sets$cs)[sapply(ssie_obj$dlpfc_eqtl$sets$cs, function(set) i_var %in% set)]
          cs_id = as.integer(str_remove(cs_id, "L"))
        }
        # ignore if not in a credible set
        if (cs_id == -1) {
          next()
        }
        # ignore if pip is below a threshold
        if(pip < 1e-04) {
          next()
        }
        info = paste0(gene, ":", cs_id, "@", tissue, "=", format(pip, scientific = T), ":", "[", 
                      format(sums_cs_pip[[paste0("L", cs_id)]], scientific = T), ":",  lengths_cs[[paste0("L", cs_id)]], "]")
        df = data.frame(chr=chr, pos=pos, var_id=var_id, ref = ref, alt = alt, info=info)
        vcf_out <- rbind(vcf_out, df)
        }
    }
    write.table(vcf_out, "$[out_vcf]_test", sep ="\t", quote = F, row.names = F, col.names = F)

In [None]:
[susie_to_dapg_2]
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
python3: expand= "$[ ]", container = container
    f = open("$[out_vcf]_test")
    f_out = open("$[out_vcf]", "w+")
    info_map = {}
    for line in f:
        elems = line.split("\t")
        elems[-1] = elems[-1].strip()
        var_idx = elems[2]
        try:
            info_map[var_idx] = info_map[var_idx] + [elems[-1]]
        except KeyError:
            info_map[var_idx] = [elems[-1]]
    for var_idx, infos in info_map.items():
        chrm, pos, ref, alt = var_idx.split("_")
        info = "|".join(infos)
        f_out.write("\t".join([chrm, pos, var_idx, ref, alt, info]) + "\n")
    f_out.close()
    f.close()

In [None]:
[susie_to_dapg_3]
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
sh: expand= "$[ ]", container = container
    rm $[out_vcf]_test
    gzip $[out_vcf]

# fastENLOC from susie objects

The first input for the pipeline is a table listing the `susie` objects for each of the LD Blocks. This takes the form of one column with the name of the LD block and another with the path to the object file.

In [6]:
head /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/kunkle.tsv

ld_block	susie_object_file
chr1_101384274_104443097	/restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/Final_Finemapping/outdir/kunkle/kunkle_sumstat_hg38_qc.chr1.chr1_101384274_104443097.unisusie_rss.fit.rds
chr1_104443097_106225286	/restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/Final_Finemapping/outdir/kunkle/kunkle_sumstat_hg38_qc.chr1.chr1_104443097_106225286.unisusie_rss.fit.rds
chr1_106225286_109761915	/restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/Final_Finemapping/outdir/kunkle/kunkle_sumstat_hg38_qc.chr1.chr1_106225286_109761915.unisusie_rss.fit.rds
chr1_109761915_111483530	/restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/Final_Finemapping/outdir/kunkle/kunkle_sumstat_hg38_qc.chr1.chr1_109761915_111483530.unisusie_rss.fit.rds
chr1_111483530_113276642	/restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/Final_Finemapping/outdir/kunkle/kunkle_sumstat_hg38_qc.chr1.chr1_111483530_113276642.unisusie_rss.fit.rds
chr1_113276642_115338054	/restricted/projectnb/c

Additionally we need a VCF file containing the xQTLs for the tissue of interest

In [1]:
zcat /restricted/projectnb/casa/oaolayin/gtex_v8.eqtl_annot_rsid.vcf.gz | head 

chr1	14677	rs201327123	G	A	ENSG00000228463:2@Skin_Not_Sun_Exposed=1.00000e+00[1.000e+00:1]|ENSG00000228463:1@Adipose_Visceral_Omentum=1.00000e+00[1.000e+00:1]|ENSG00000228463:2@Nerve_Tibial=1.00000e+00[1.000e+00:1]|ENSG00000228463:2@Muscle_Skeletal=1.00000e+00[1.000e+00:1]|ENSG00000228463:2@Skin_Sun_Exposed=9.99998e-01[1.000e+00:1]|ENSG00000228463:1@Heart_Left_Ventricle=9.99995e-01[1.000e+00:1]|ENSG00000228463:1@Heart_Atrial_Appendage=9.99990e-01[1.000e+00:1]|ENSG00000241860:1@Skin_Sun_Exposed=9.99986e-01[1.000e+00:1]|ENSG00000241860:1@Skin_Not_Sun_Exposed=9.99983e-01[1.000e+00:1]|ENSG00000228327:1@Muscle_Skeletal=9.99942e-01[9.999e-01:1]|ENSG00000241860:1@Nerve_Tibial=9.99938e-01[9.999e-01:1]|ENSG00000228327:1@Adipose_Visceral_Omentum=9.99918e-01[9.999e-01:1]|ENSG00000228327:2@Adipose_Subcutaneous=9.99918e-01[9.999e-01:1]|ENSG00000228327:1@Heart_Atrial_Appendage=9.99866e-01[9.999e-01:1]|ENSG00000228327:2@Lung=9.99832e-01[9.998e-01:1]|ENSG00000241860:1@Artery_Tibial=9.99822e-01[9.998e-

In the first step we convert the susie objects into one table for use with fastENLOC.

In [2]:
[fastenloc_1]
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
R: expand= "$[ ]", container = container
  susie_tbl = read.csv('$[file_table]', sep = "\t")
  out_tbl = list()
  out_tbl$var = c()
  out_tbl$pip = c()
  out_tbl$set = c()
  for(idx in seq(1,nrow(susie_tbl))) {
    ld_block = susie_tbl$ld_block[idx]
    filename = susie_tbl$susie_object_file[idx]
    ssie = readRDS(filename)
    vars = ssie$variants
    out_tbl$var = c(out_tbl$var, vars)
    out_tbl$set = c(out_tbl$set, rep(ld_block, length(vars)))
    pip = ssie$pip
    out_tbl$pip = c(out_tbl$pip, pip)
  }
  out_tbl = as.data.frame(out_tbl)
  gzf = gzfile('$[out_file]', 'w+')
  write.table(out_tbl, gzf, sep = "\t", quote = F, row.names = F, col.names = F)
  close(gzf)

And then run fastenloc on that table

In [3]:
[fastenloc_2]
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
sh: expand=True, container = container
    fastenloc -eqtl {eqtl_vcf} -gwas {out_file} -t {tissue} -prefix {out_pre}

# Example
We now run an example of this using the vcf file generated from the sample of susie eQTLs.

In [2]:
export SINGULARITY_BIND="/restricted/projectnb/amp-ad/,/restricted/projectnb/casa,/scratch/"
sos run /restricted/projectnb/casa/oaolayin/xqtl-pipeline/code/intact_pipeline/fastenloc.ipynb susie_to_dapg \
    --susie-table /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/eqtl_susie_table_ex.tsv \
    --out-vcf /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/test.vcf \
    --tissue DLPFC --container /restricted/projectnb/casa/oaolayin/xqtl-pipeline/container/singularity/fastenloc.sif

INFO: Running [32msusie_to_dapg_1[0m: 
[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25hINFO: [32msusie_to_dapg_1[0m is [32mcompleted[0m.
INFO: Running [32msusie_to_dapg_2[0m: 
INFO: [32msusie_to_dapg_2[0m is [32mcompleted[0m.
INFO: Running [32msusie_to_dapg_3[0m: 
INFO: [32msusie_to_dapg_3[0m is [32mcompleted[0m.
INFO: Workflow susie_to_dapg (ID=w71c58bd2bc7b2d49) is executed successfully with 3 completed steps.


In [2]:
export SINGULARITY_BIND="/restricted/projectnb/amp-ad/,/restricted/projectnb/casa,/scratch/"
tmp_file=$(mktemp)
sos run /restricted/projectnb/casa/oaolayin/xqtl-pipeline/code/intact_pipeline/fastenloc.ipynb fastenloc \
    --file-table /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/kunkle.tsv \
    --eqtl-vcf /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/test.vcf.gz \
    --out-file ${tmp_file} --out-pre /restricted/projectnb/casa/oaolayin/fastenloc_test/kunkle_DLPFC --tissue DLPFC \
    --container /restricted/projectnb/casa/oaolayin/xqtl-pipeline/container/singularity/fastenloc.sif

INFO: Running [32mfastenloc_1[0m: 
INFO: [32mfastenloc_1[0m is [32mcompleted[0m.
INFO: Running [32mfastenloc_2[0m: 

		                     fastENLOC (v2.0)                        

		                        April, 2022                              




Parameters and options:

Input files:
    * Molecular qtl annotation file: /restricted/projectnb/casa/oaolayin/AD_GWAS_sum_stats/fastenloc/susie_tables/test.vcf.gz
    * GWAS fine-mapping file: /scratch/199892.1.ood/tmp.JZJD71q8Uz
    * Tissue specified: DLPFC

Enrichment parameters:
    * Rounds of multiple imputation: 25
    * Shrinkage parameter: 1.0

Miscsellaneous options:
    * Total GWAS variants: unspecified, use GWAS file input
    * Simultaneous running threads: 1

Output options:
    * Output file prefix: /restricted/projectnb/casa/oaolayin/fastenloc_test/kunkle_DLPFC
    * RCP and SCP output threshold: 1.0e-04


Processing eQTL annotations ... 
read in 589 SNPs, 23 eQTL signal clusters, 22.3 expected eQTLs

Processin

## Output
Information on the fastENLOC outputs can be found [here](https://github.com/xqwen/fastenloc/blob/master/tutorial/README.md). 

In [6]:
head -n 100 /restricted/projectnb/casa/oaolayin/fastenloc_test/kunkle_DLPFC*

==> /restricted/projectnb/casa/oaolayin/fastenloc_test/kunkle_DLPFC.enloc.enrich.out <==
                Intercept    -7.802           -
Enrichment (no shrinkage)     1.853     3696.712
Enrichment (w/ shrinkage)     0.000       1.000


## Alternative (coloc) parameterization: p1 = 4.088e-04, p2 = 1.476e-06, p12 = 6.038e-10


==> /restricted/projectnb/casa/oaolayin/fastenloc_test/kunkle_DLPFC.enloc.gene.out <==
Gene		GRCP	GLCP
ENSG00000000457		2.250e-03	9.590e-02
ENSG00000000971		0.000e+00	0.000e+00
ENSG00000001084		5.420e-05	6.338e-04
ENSG00000001167		4.646e-04	1.021e-02
ENSG00000001460		0.000e+00	0.000e+00
ENSG00000001461		0.000e+00	0.000e+00
ENSG00000001561		2.034e-04	2.624e-03
ENSG00000001626		0.000e+00	0.000e+00
ENSG00000001629		1.429e-03	5.209e-02
ENSG00000001630		1.794e-03	1.040e-01
ENSG00000002016		0.000e+00	0.000e+00
ENSG00000002745		5.529e-04	1.674e-03
ENSG00000002822		2.165e-03	2.726e-02
ENSG00000002834		5.972e-04	5.972e-04
ENSG00000002933		0.000e+00	0.000e+00

==> /restricte