In [3]:
setwd("/projects/CARDIPS/analysis/epigenome_resource")
source("analyses/jennifer/notebooks/functions.R")

set.seed(5366)
library(coloc)

This is coloc version 5.2.3



## **Prepare input for GWAS**

In [6]:
qtls = fread("analyses/tim/ld_modules/scripts/primary_qtls.txt", data.table = T)
qtls$taskid = c(1:nrow(qtls))
qtls$path_exists = file.exists(qtls$path)

table(qtls$path_exists)

head(qtls,2)

fwrite(qtls, "analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas.txt", row.names = F, sep = "\t")


 TRUE 
59693 

element_id,id,chrom,pos,ref,alt,rsid,genotyped,af,beta,⋯,type,new_egene,tissue,tissue_qtl_id,tissue_element,test,datatype,path,taskid,path_exists
<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,⋯,<int>,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<lgl>
ENSG00000000460.17,VAR_1_169804722_A_C,chr1,169804722,A,C,rs12142505,1,0.0805861,0.8196584,⋯,0,True,iPSC,iPSC_ENSG00000000460.17_0,iPSC_ENSG00000000460.17,original,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000000460.17.txt,1,True
ENSG00000000938.13,VAR_1_27564022_A_G,chr1,27564022,A,G,rs12146138,1,0.0567766,1.5833789,⋯,0,True,iPSC,iPSC_ENSG00000000938.13_0,iPSC_ENSG00000000938.13,original,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000000938.13.txt,2,True


## **Launch job**

In [92]:
input_file="/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas.txt"
manifest="/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_independent/subset_manifest.txt"
script="/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.03.run_qtl_gwas_coloc.sh"
outdir="/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls"
cmd = paste("qsub -N gwas_coloc -t", paste0("1-", nrow(qtls), ":1"), 
            "-tc 300 -pe smp 2 -l short -V -cwd -o analyses/jennifer/logs/coloc_gwas.out -e analyses/jennifer/logs/coloc_gwas.err", script, input_file, manifest, outdir)

message(cmd)
system(cmd)

qsub -N gwas_coloc -t 1-59693:1 -tc 300 -pe smp 2 -l short -V -cwd -o analyses/jennifer/logs/coloc_gwas.out -e analyses/jennifer/logs/coloc_gwas.err /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.03.run_qtl_gwas_coloc.sh /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas.txt /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_independent/subset_manifest.txt /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls



## **concatenate all GWAS results**

In [8]:
tissues = c("iPSC", "PPC", "CVPC")
analyses = c("eqtls", "caqtls", "haqtls")

In [9]:
coloc_results_dir = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls"

In [11]:
table(qtls$tissue, qtls$datatype)

      
       caqtls eqtls haqtls
  CVPC  11187  4796   8912
  iPSC   9017  8916   1221
  PPC   10255  5389      0

In [38]:
pipeline = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.04.summarize_gwas.R"
manifest_file = "analyses/jennifer/gwas_independent/subset_manifest.txt"

for (t in tissues)
{
    for (a in analyses)
    {
        files = list.files(paste("analyses/jennifer/gwas_coloc/regressed_qtls", a, t, sep = "/"))
        message(paste(a, t, length(files)), appendLF = F)
        if (length(files) > 0)
        {
            cmd = paste("Rscript", pipeline, 
                        "--analysis", a, 
                        "--tissue", t, 
                        "--manifest_file", manifest_file, 
                        "--coloc_results_dir", coloc_results_dir)
            
            log_out = paste(getwd(), "analyses/jennifer/gwas_coloc/logs", paste(a, t, "summarize.out", sep = "_"), sep = "/")
            log_err = paste(getwd(), "analyses/jennifer/gwas_coloc/logs", paste(a, t, "summarize.err", sep = "_"), sep = "/")
            run_qsub(name = paste(a, t, sep = "_"), cmd = cmd, threads = 4, log_out = log_out, log_err = log_err, exec = T)
        }
    }
}


eqtls iPSC 8910
echo "Rscript /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.04.summarize_gwas.R --analysis eqtls --tissue iPSC --manifest_file analyses/jennifer/gwas_independent/subset_manifest.txt --coloc_results_dir /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls" | qsub -N eqtls_iPSC -V -cwd -pe smp 4 -o /frazer01/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/logs/eqtls_iPSC_summarize.out -e /frazer01/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/logs/eqtls_iPSC_summarize.err

caqtls iPSC 8961
echo "Rscript /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.04.summarize_gwas.R --analysis caqtls --tissue iPSC --manifest_file analyses/jennifer/gwas_independent/subset_manifest.txt --coloc_results_dir /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls" | qsub -N caqtls_iPSC -V -cwd -pe smp 4 -o /

## **aggregate**

In [61]:
summary = as.data.frame(rbindlist(lapply(tissues, function(t)
{
    as.data.frame(rbindlist(lapply(analyses, function(a)
    {
        file = paste(coloc_results_dir, "results", paste(paste(a, t, "summary", sep = "_"), "txt", sep = "."), sep = "/")
        message(file)
        if (file.exists(file))
        {
            message(paste(t, a),  appendLF = F)
            fread(file, data.table = F)
        } else
        {
            message(paste("Missing", t, a))
        }
    })))
}))) %>% mutate(p.gwas = as.double(p.gwas), p.eqtl = as.double(p.eqtl)) %>% dplyr::rename(type = discovery_order) 

table(summary$analysis, summary$tissue)

/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/eqtls_iPSC_summary.txt

iPSC eqtls
/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/caqtls_iPSC_summary.txt

iPSC caqtls
/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/haqtls_iPSC_summary.txt

iPSC haqtls
/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/eqtls_PPC_summary.txt

PPC eqtls
/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/caqtls_PPC_summary.txt

PPC caqtls
/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/haqtls_PPC_summary.txt

Missing PPC haqtls

/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/results/eqtls_CVPC_summary.txt

CVPC eqtls
/projects/CARDIPS/analysis/epigenome_resource/analyses/jen

        
           CVPC   iPSC    PPC
  caqtls 131611 106331 120746
  eqtls   57789 107643  65016
  haqtls 105326  14524      0

## **check qtls that are missing / not tested**

In [62]:
summary$qtl_id = NULL
summary$element_cond = ifelse(summary$element_id %like% "ENSG", 
                               paste(summary$tissue, summary$element_id, summary$type, sep = "_"), 
                               paste(summary$element_id, summary$type, sep = "_"))

In [63]:
qtls$tissue_qtl_id = ifelse(qtls$element_id %like% "ENSG",
                            paste(qtls$tissue, qtls$element_id, qtls$type, sep = "_"),
                            paste(qtls$element_id, qtls$type, sep = "_"))

In [64]:
# check for qtls that were not supposed to be tested (i.e., not the nominated primary)
summary %>% filter(!element_cond %in% qtls$tissue_qtl_id)

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


analysis,tissue,element_id,type,trait_id,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,⋯,topsnp_pp,beta.eqtl,se.eqtl,p.eqtl,beta.gwas,se.gwas,p.gwas,bonferroni.eqtl,cs_size,element_cond
<chr>,<chr>,<chr>,<int>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>


In [65]:
message(paste("Did not test:", length(unique(qtls[!qtls$tissue_qtl_id %in% summary$element_cond,]$tissue_qtl_id))))

missing        = qtls[!qtls$tissue_qtl_id %in% summary$element_cond,] 
missing$qtl_id = ifelse(missing$qtl_id %like% "ENSG", paste(missing$tissue, missing$qtl_id, sep = "_"), missing$qtl_id)
missing$taskid = c(1:nrow(missing))
# fwrite(missing, "analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas_v02.txt", row.names = F, sep = "\t")

head(missing,2)
nrow(missing)

Did not test: 233



element_id,id,chrom,pos,ref,alt,rsid,genotyped,af,beta,⋯,new_egene,tissue,tissue_qtl_id,tissue_element,test,datatype,path,taskid,path_exists,qtl_id
<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,⋯,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<lgl>,<lgl>
ENSG00000234665.9,VAR_9_63627840_C_T,chr9,63627840,C,T,rs60554899,1,0.0695971,1.1757249,⋯,True,iPSC,iPSC_ENSG00000234665.9_0,iPSC_ENSG00000234665.9,original,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000234665.9.txt,1,True,
ENSG00000263513.6,VAR_1_143751203_A_C,chr1,143751203,A,C,.,1,0.17033,0.6939457,⋯,True,iPSC,iPSC_ENSG00000263513.6_0,iPSC_ENSG00000263513.6,original,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000263513.6.txt,2,True,


In [66]:
# These could not be tested because overlap with gwas < 50 snps or no GWAS peaks below significance
head(qtls[!qtls$tissue_qtl_id %in% summary$element_cond,]$tissue_qtl_id)

## **rerun GWAS colocalization**

In [36]:
input_file = "analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas_v02.txt"
manifest   = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_independent/subset_manifest.txt"
script     = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.03.run_qtl_gwas_coloc.sh"
outdir     = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls"
log_out    = "analyses/jennifer/logs/coloc_gwas_v02.out"
log_err    = "analyses/jennifer/logs/coloc_gwas_v02.err"

cmd = paste("qsub -N gwas_coloc", 
            "-t", paste0("1-", nrow(fread(input_file, data.table = F)), ":1"), 
            "-tc 300 -pe smp 2 -V -cwd",
            "-o", log_out,
            "-e", log_err, 
            "-l short",
            script, 
            input_file, 
            manifest, 
            outdir)

message(cmd)
system(cmd)

qsub -N gwas_coloc -t 1-292:1 -tc 300 -pe smp 2 -V -cwd -o analyses/jennifer/logs/coloc_gwas_v02.out -e analyses/jennifer/logs/coloc_gwas_v02.err -l short /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/notebooks/08.03.run_qtl_gwas_coloc.sh analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas_v02.txt /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_independent/subset_manifest.txt /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls



## **check files**

In [10]:
files = as.data.frame(rbindlist(lapply(analyses, function(a)
{
    as.data.frame(rbindlist(lapply(tissues, function(t)
    {
        files = list.files(paste("analyses/jennifer/gwas_coloc/regressed_qtls", a, t, sep = "/"))
        if (length(files) > 0)
        {
            data.frame(file = files) %>% 
                mutate(analysis = a, tissue = t,
                       element_id = gsub(".robj", "", unlist(lapply(file, function(x) { unlist(strsplit(x, "-"))[2] }))),
                       cond = unlist(lapply(files, function(x) { unlist(strsplit(x, "-"))[1] })))
        } 
    })))
}))) %>% mutate(element_cond = ifelse(element_id %like% "ENSG", paste(tolower(tissue), element_id, cond, sep = "_"), paste(element_id, cond, sep = "_")))

In [11]:
all_qtls %>% filter(!element_cond %in% files$element_cond) 

element_id,type,new_egene,tissue,qtl_id,regressed,regressed_signif,regressed_primary,updated_primary,datatype,path_to_qtl,element_cond
<chr>,<int>,<lgl>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<lgl>,<chr>,<chr>,<chr>
ENSG00000234665.9,0,TRUE,iPSC,ENSG00000234665.9_0,FALSE,,FALSE,TRUE,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000234665.9.txt,ipsc_ENSG00000234665.9_0
ENSG00000263513.6,0,TRUE,iPSC,ENSG00000263513.6_0,FALSE,,FALSE,TRUE,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000263513.6.txt,ipsc_ENSG00000263513.6_0
ENSG00000266338.7,0,TRUE,iPSC,ENSG00000266338.7_0,FALSE,,FALSE,TRUE,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000266338.7.txt,ipsc_ENSG00000266338.7_0
ENSG00000270872.2,0,TRUE,iPSC,ENSG00000270872.2_0,TRUE,TRUE,TRUE,TRUE,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_5/qtl_by_element/qtl/qtl.ENSG00000270872.2.0.txt,ipsc_ENSG00000270872.2_0
ENSG00000289318.2,0,TRUE,iPSC,ENSG00000289318.2_0,TRUE,TRUE,TRUE,TRUE,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_5/qtl_by_element/qtl/qtl.ENSG00000289318.2.0.txt,ipsc_ENSG00000289318.2_0
ENSG00000291181.1,0,TRUE,iPSC,ENSG00000291181.1_0,FALSE,,FALSE,TRUE,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000291181.1.txt,ipsc_ENSG00000291181.1_0
ipsc_atac_peak_10677,0,TRUE,iPSC,ipsc_atac_peak_10677_0,FALSE,,FALSE,TRUE,caqtls,/projects/CARDIPS/analysis/epigenome_resource/caqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ipsc_atac_peak_10677.txt,ipsc_atac_peak_10677_0
ipsc_atac_peak_11059,0,TRUE,iPSC,ipsc_atac_peak_11059_0,FALSE,,FALSE,TRUE,caqtls,/projects/CARDIPS/analysis/epigenome_resource/caqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ipsc_atac_peak_11059.txt,ipsc_atac_peak_11059_0
ipsc_atac_peak_11063,0,TRUE,iPSC,ipsc_atac_peak_11063_0,FALSE,,FALSE,TRUE,caqtls,/projects/CARDIPS/analysis/epigenome_resource/caqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ipsc_atac_peak_11063.txt,ipsc_atac_peak_11063_0
ipsc_atac_peak_11064,0,TRUE,iPSC,ipsc_atac_peak_11064_0,FALSE,,FALSE,TRUE,caqtls,/projects/CARDIPS/analysis/epigenome_resource/caqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ipsc_atac_peak_11064.txt,ipsc_atac_peak_11064_0


## **Test run the script**

In [50]:
setwd("/projects/CARDIPS/analysis/epigenome_resource")
suppressMessages(source("analyses/jennifer/notebooks/functions.R"))
set.seed(5366)
library(coloc)

# option_list = list(make_option("--taskid", type = "integer", default = NA, help = "taskid", metavar = "integer"),
#                    make_option("--input_file", type = "character", default = NA, help = "analysis", metavar = "character"),
#                    make_option("--manifest", type = "character", default = NA, help = "manifest file", metavar = "character"),
#                    make_option("--outdir", type = "character", default = NA, help = "${out_dir}/${analysis}", metavar = "character"))

# # parse arguments
# opt_parser    = OptionParser(option_list = option_list)
# opt           = parse_args(opt_parser)

# set arguments
# task_id       = opt$taskid
# input_file    = opt$input_file # list of qtls to colocalization GWAS with (require columns: type, element_id, qtl_id, tissue, analysis)
# manifest_file = opt$manifest
# outdir        = opt$outdir

# message(paste("Input file:"   , input_file   ))
# message(paste("Manifest file:", manifest_file))
# message(paste("Taskid:"       , task_id      ))

In [54]:
head(totest,2)

Unnamed: 0_level_0,element_id,id,chrom,pos,ref,alt,rsid,genotyped,af,beta,⋯,type,new_egene,tissue,tissue_qtl_id,tissue_element,test,datatype,path,taskid,path_exists
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,⋯,<int>,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<lgl>
1,ENSG00000000460.17,VAR_1_169804722_A_C,chr1,169804722,A,C,rs12142505,1,0.0805861,0.8196584,⋯,0,True,iPSC,iPSC_ENSG00000000460.17_0,iPSC_ENSG00000000460.17,original,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000000460.17.txt,1,True
2,ENSG00000000938.13,VAR_1_27564022_A_G,chr1,27564022,A,G,rs12146138,1,0.0567766,1.5833789,⋯,0,True,iPSC,iPSC_ENSG00000000938.13_0,iPSC_ENSG00000000938.13,original,eqtls,/projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000000938.13.txt,2,True


In [83]:
task_id = 1
totest = fread("analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas.txt", data.table = F)
# totest = totest %>% filter(qtl_id == "ENSG00000140968.12_0" & tissue == "CVPC")
# input  = totest

# task_id       = totest$taskid
task_id = 1
input_file    = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/input/qtls_to_coloc_with_gwas.txt"
manifest_file = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_independent/subset_manifest.txt"
outdir        = "/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls"

In [90]:
# read gwas manifest
manifest = fread(manifest_file, data.table = F)

# read list of qtls to process
# filter for the qtl to process
input    = fread(input_file   , data.table = F)
input    = input[input$taskid == task_id,]

# get info about the qtl
element_id      = input$element_id
discovery_order = input$type
analysis        = input$datatype
tissue          = input$tissue
qtl_file        = input[,colnames(input)[which(colnames(input) %like% "path")]]

message(paste("Element ID:"     , element_id     ))
message(paste("Discovery Order:", discovery_order))
message(paste("Analysis:"       , analysis       ))
message(paste("Tissue:"         , tissue         ))
message(paste("QTL file:"       , qtl_file       ))


Element ID: ENSG00000000460.17

Discovery Order: 0

Analysis: eqtls

Tissue: iPSC

QTL file: /projects/CARDIPS/analysis/epigenome_resource/eqtls/iPSC/step_4/qtl_by_element/qtl/qtl.ENSG00000000460.17.txtQTL file: TRUE



In [85]:
suppressWarnings(dir.create(paste(outdir, sep = "/")))
suppressWarnings(dir.create(paste(outdir, analysis, sep = "/")))
suppressWarnings(dir.create(paste(outdir, analysis, tissue, sep = "/")))

In [86]:
# set file path for output
outdir = paste(outdir, analysis, tissue, sep = "/")
outfile = paste(outdir, paste(paste(discovery_order, element_id, sep = "-"), "robj", sep = "."), sep = "/")

message(outfile)

/projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/eqtls/iPSC/0-ENSG00000000460.17.robj



In [87]:
# read phenotype info
pheninfo = fread(paste(analysis, tissue, "input/phenotype_info.txt", sep = "/"), data.table = F)
pheninfo = pheninfo[pheninfo[,4] == element_id,]

# set sample size for each analysis type
# number corresponds to the number of samples
ipscore_sample_size = list("iPSC eqtls"  = 220, 
                           "iPSC caqtls" = 142, 
                           "iPSC haqtls" = 43, 
                           "CVPC eqtls"  = 178, 
                           "CVPC caqtls" = 140, 
                           "CVPC haqtls" = 101, 
                           "PPC eqtls"   = 107, 
                           "PPC caqtls"  = 109)

# read qtldata
qtldata        = fread(qtl_file, data.table = F)
qtldata$chrpos = paste(qtldata$chrom, qtldata$pos, sep = "_") # set chrom and pos
qtldata        = qtldata %>% dplyr::rename(a1 = ref, a2 = alt, p = pval) # rename columns to match those in gwas
qtldata$maf    = ifelse(qtldata$af > 0.5, 1-qtldata$af, qtldata$af) # set maf
qtldata        = qtldata[qtldata$type == discovery_order, c("chrpos", "a1", "a2", "beta", "se", "maf", "p", "bonferroni")] # filter for the type

#fdrdata        = fread(paste(analysis, tissue, "step_4/qtl_by_element/qtl", paste("fdr", element_id, "txt", sep = "."), sep = "/"), data.table = F)
#qtldata$tests  = fdrdata[fdrdata$type == discovery_order,]$tests

#if (file.exists(outfile) == T)
#{
#    load(outfile, verbose = T)
#} else
#{
outlist = list()
#}

# iterate through each row in GWAS manifest
for (gwas_row in c(1:nrow(manifest)))
{
    gwas_file = manifest$filename[gwas_row] # path to GWAS sumstats
    gwas_type = manifest$trait_type[gwas_row] # trait type (continuous or case-control)
    trait_id = manifest$full_trait_id[gwas_row] # trait-id
    description = manifest$description[gwas_row] # trait-id
    
    # check if the trait has already been colocalized
    if (trait_id %in% names(outlist) & length(outlist[[trait_id]]) == 4)
    {
        message(paste("Skipping", gwas_row, trait_id, description, ". Already exists."))
    } else
    {
        message(paste(Sys.time(), gwas_row, trait_id, description))

        # get association statistics for the region of interest (1 MB from element coordinates)
        coord = paste0(pheninfo$chrom, ":", pheninfo$start - 1e6, "-", pheninfo$end + 1e6)
        cmd1 = paste("tabix", gwas_file, coord)
        cmd2 = paste("zcat", gwas_file, "| head -1")

        message(cmd1)
        message(cmd2)

        gwasdata = suppressWarnings(fread(cmd = cmd1, data.table = F, header = F))
        header   = suppressWarnings(fread(cmd = cmd2, data.table = F))

        # if there are GWAS variants in the region of interest, continue
        if (nrow(gwasdata) > 0)
        {
            colnames(gwasdata) = colnames(header)

            # check if there are GWAS variants that are above genome-wide significance, otherwise, do not continue
            if (nrow(gwasdata[gwasdata$p < 5e-05,]) > 0 & "maf" %in% colnames(header))
            {
                if (gwas_type == "case_control")
                {
                    cols = c("chrpos", "a1", "a2", "beta", "se", "p", "cases_fr", "maf", "total")
                    
                    print(colnames(gwasdata))
                    
                    missing_cols = cols[which(!cols %in% colnames(gwasdata))]
                    
                    if (length(missing_cols) > 0)
                    {
                        stop(paste("Missing", paste(cols, collase = ", ")))
                    } 
                    
                    if (!"cases_fr" %in% colnames(gwasdata)) { gwasdata$cases_fr = gwasdata$n_case / gwasdata$total }
                    
                    gwasdata = gwasdata[,c("chrpos", "a1", "a2", "beta", "se", "p", "cases_fr", "maf", "total")]
                } else
                {
                    if (!"n" %in% colnames(gwasdata))
                    {
                        gwasdata$n = gwasdata$n_case 
                        gwasdata = gwasdata[,c("chrpos", "a1", "a2", "beta", "se", "p", "n", "maf")]
                    } else
                    {
                        gwasdata = gwasdata[,c("chrpos", "a1", "a2", "beta", "se", "p", "n", "maf")]
                    }
                }

                merge = merge(qtldata, gwasdata, by = "chrpos")

                # fix opp. alleles
                tmp1 = merge[merge$a1.x == merge$a1.y & merge$a2.x == merge$a2.y,]
                tmp2 = merge[merge$a1.x == merge$a2.y & merge$a2.x == merge$a1.y,]
                tmp2$beta.y = -1 * tmp2$beta.y
                merge = rbind(tmp1, tmp2) 

                # remove multi-allelic snps
                fq = data.frame(table(merge$chrpos)) %>% filter(Freq != 1)
                merge = merge[!merge$chrpos %in% fq$Var1,]
                message(paste("Multi-allelic snps:", nrow(fq)))

                # update id
                merge = merge %>% mutate(chrpos = gsub("chr", "VAR_", paste(chrpos, a1.x, a2.x, sep = "_"))) %>% dplyr::rename(id = chrpos)

                # remove empty entries
                merge = merge[!is.na(merge$beta.x) & 
                              !is.na(merge$beta.y) & 
                              !is.na(merge$maf.x) & 
                              !is.na(merge$maf.y) & 
                              complete.cases(merge) & 
                              merge$maf.x > 0 & 
                              merge$maf.x < 1 & 
                              merge$maf.y > 0 & 
                              merge$maf.y < 1,]
                merge$maf.x  = as.double(merge$maf.x)
                merge$maf.y  = as.double(merge$maf.y)
                merge$beta.x = as.double(merge$beta.x)
                merge$beta.y = as.double(merge$beta.y)
                merge$se.x   = as.double(merge$se.x)
                merge$se.y   = as.double(merge$se.y)

                if (nrow(merge) > 50)
                {
                    dataset1 = list(type = "quant", snp = merge$id, beta = merge$beta.x, varbeta = merge$se.x^2, MAF = merge$maf.x, N = ipscore_sample_size[[paste(tissue, analysis)]])

                    if (gwas_type == "case_control")
                    {
                        dataset2 = list(type = "cc", snp = merge$id, beta = merge$beta.y, varbeta = merge$se.y^2, MAF = merge$maf.y, s = merge$cases_fr, N = merge$total)
                    } else
                    {
                        dataset2 = list(type = "quant", snp = merge$id, beta = merge$beta.y, varbeta = merge$se.y^2, MAF = merge$maf.y, N = merge$n)
                    }

                    coloc = suppressWarnings(coloc.abf(dataset1 = dataset1, dataset2 = dataset2))
                    coloc = process_coloc(coloc)
                    coloc$input = merge
                    outlist[[trait_id]] = coloc

                } else
                {
                    message("Overlapping SNPs < 50")
                }
            } else
            {
                if (!"maf" %in% colnames(header))
                {
                    message("No MAF")
                } else if (nrow(gwasdata[gwasdata$p < 5e-05,]) == 0)
                {
                    message("No GWAS variants passed significance (p < 5e-05)")
                } else
                {
                    message("Unknown error")
                }
            }
        } else
        {
            message("No GWAS variants in window")
        }
    }  
}

if (length(outlist) > 0)
{
    suppressWarnings(dir.create(outdir))
    save(outlist, file = outfile)
    message(paste("Saved:", outfile))
} else
{
    message("Empty results. No overlap with GWAS")
}





2024-06-11 15:23:59 1 healthspan_summary Healthspan

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/tim/gwas_liftover/hg38_summary_statistics/healthspan_summary.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/tim/gwas_liftover/hg38_summary_statistics/healthspan_summary.hg38.tsv.gz | head -1

No GWAS variants passed significance (p < 5e-05)

2024-06-11 15:24:00 2 lifegen_phase2_bothpl_alldr_2017_09_18 Longevity

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/tim/gwas_liftover/hg38_summary_statistics/lifegen_phase2_bothpl_alldr_2017_09_18.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/tim/gwas_liftover/hg38_summary_statistics/lifegen_phase2_bothpl_alldr_2017_09_18.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.22000   0.74800   0.00657   0.02230   0.00281 
[1] "PP abf for shared variant: 0.281%"


2024-06-11 15:24:00 3 timmers2020_healthspan_lifespan_longevity Multivariate Longevity

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/tim/gwas_liftover/hg38_summary_statistics/timmers2020_healthspan_lifespan_longevity.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/tim/gwas_liftover/hg38_summary_statistics/timmers2020_healthspan_lifespan_longevity.hg38.tsv.gz | head -1

No GWAS variants passed significance (p < 5e-05)

2024-06-11 15:24:00 4 biomarkers-30760-both_sexes-irnt.meta HDL cholesterol

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/biomarkers-30760-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/biomarkers-30760-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.0878    0.3070    0.0715    0.2500    0.2840 
[1] "PP abf for shared variant: 28.4%"


2024-06-11 15:24:01 5 biomarkers-30780-both_sexes-irnt.meta LDL direct

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/biomarkers-30780-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/biomarkers-30780-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.2040    0.7120    0.0122    0.0426    0.0299 
[1] "PP abf for shared variant: 2.99%"


2024-06-11 15:24:01 6 BW3_EUR_summary_stats birth weight (eur)

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/BW3_EUR_summary_stats.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/BW3_EUR_summary_stats.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
    0.072     0.251     0.148     0.517     0.012 
[1] "PP abf for shared variant: 1.2%"


2024-06-11 15:24:01 7 CHILDHOOD_OBESITY.eur childhood obesity (eur)

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/CHILDHOOD_OBESITY.eur.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/CHILDHOOD_OBESITY.eur.hg38.tsv.gz | head -1

No GWAS variants passed significance (p < 5e-05)

2024-06-11 15:24:02 8 continuous-12336-both_sexes-irnt.meta Ventricular rate

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-12336-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-12336-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.20600   0.72000   0.01460   0.05080   0.00821 
[1] "PP abf for shared variant: 0.821%"


2024-06-11 15:24:02 9 continuous-12340-both_sexes-irnt.meta QRS duration

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-12340-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-12340-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.1810    0.6320    0.0381    0.1330    0.0157 
[1] "PP abf for shared variant: 1.57%"


2024-06-11 15:24:02 10 continuous-20022-both_sexes-irnt.meta Birth weight

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-20022-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-20022-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.1610    0.5640    0.0588    0.2050    0.0110 
[1] "PP abf for shared variant: 1.1%"


2024-06-11 15:24:03 11 continuous-23104-both_sexes-irnt.meta Body mass index (BMI)

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-23104-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-23104-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.18900   0.66200   0.03210   0.11200   0.00434 
[1] "PP abf for shared variant: 0.434%"


2024-06-11 15:24:03 12 continuous-4194-both_sexes-irnt.meta Pulse rate

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-4194-both_sexes-irnt.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/continuous-4194-both_sexes-irnt.meta.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.2100    0.7330    0.0119    0.0416    0.0038 
[1] "PP abf for shared variant: 0.38%"


2024-06-11 15:24:04 13 GCST90014023_buildGRCh38 type_1_diabetes

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/GCST90014023_buildGRCh38.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/GCST90014023_buildGRCh38.hg38.tsv.gz | head -1



 [1] "chr"      "pos"      "a1"       "a2"       "beta"     "se"      
 [7] "p"        "af"       "maf"      "total"    "chrpos"   "cases_fr"
[13] "rsid"    


Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.1230    0.4290    0.0873    0.3050    0.0554 
[1] "PP abf for shared variant: 5.54%"


2024-06-11 15:24:04 14 icd10-I20-both_sexes.meta I20 Angina pectoris

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I20-both_sexes.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I20-both_sexes.meta.hg38.tsv.gz | head -1



 [1] "chr"       "pos"       "a1"        "a2"        "af"        "beta"     
 [7] "se"        "p"         "het_pval"  "maf"       "n_case"    "n_control"
[13] "chrpos"    "cases_fr"  "total"    


Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.0747    0.2610    0.1450    0.5060    0.0143 
[1] "PP abf for shared variant: 1.43%"


2024-06-11 15:24:04 15 icd10-I21-both_sexes.meta I21 Acute myocardial infarction

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I21-both_sexes.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I21-both_sexes.meta.hg38.tsv.gz | head -1

No GWAS variants passed significance (p < 5e-05)

2024-06-11 15:24:05 16 icd10-I25-both_sexes.meta I25 Chronic ischaemic heart disease

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I25-both_sexes.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I25-both_sexes.meta.hg38.tsv.gz | head -1



 [1] "chr"       "pos"       "a1"        "a2"        "af"        "beta"     
 [7] "se"        "p"         "het_pval"  "maf"       "n_case"    "n_control"
[13] "chrpos"    "cases_fr"  "total"    


Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.0133    0.0463    0.2060    0.7210    0.0135 
[1] "PP abf for shared variant: 1.35%"


2024-06-11 15:24:05 17 icd10-I48-both_sexes.meta I48 Atrial fibrillation and flutter

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I48-both_sexes.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/icd10-I48-both_sexes.meta.hg38.tsv.gz | head -1



 [1] "chr"       "pos"       "a1"        "a2"        "af"        "beta"     
 [7] "se"        "p"         "het_pval"  "maf"       "n_case"    "n_control"
[13] "chrpos"    "cases_fr"  "total"    


Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 5.37e-29  1.88e-28  2.19e-01  7.63e-01  1.82e-02 
[1] "PP abf for shared variant: 1.82%"


2024-06-11 15:24:05 18 MAGIC1000G_FG_EUR fasting glucose

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/MAGIC1000G_FG_EUR.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/MAGIC1000G_FG_EUR.hg38.tsv.gz | head -1

Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.20900   0.72400   0.01350   0.04680   0.00629 
[1] "PP abf for shared variant: 0.629%"


2024-06-11 15:24:06 19 Mahajan.NatGenet2018b.T2D.European_sorted type 2 diabetes

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/Mahajan.NatGenet2018b.T2D.European_sorted.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/Mahajan.NatGenet2018b.T2D.European_sorted.hg38.tsv.gz | head -1



 [1] "chr"      "pos"      "id"       "a2"       "a1"       "eaf"     
 [7] "beta"     "se"       "p"        "n_case"   "total"    "maf"     
[13] "chrpos"   "cases_fr"


Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.19700   0.67500   0.02700   0.09220   0.00876 
[1] "PP abf for shared variant: 0.876%"


2024-06-11 15:24:06 20 phecode-747-both_sexes Cardiac and circulatory congenital anomalies

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/phecode-747-both_sexes.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/phecode-747-both_sexes.meta.hg38.tsv.gz | head -1

No GWAS variants passed significance (p < 5e-05)

2024-06-11 15:24:06 21 phecode-747.11-both_sexes Cardiac shunt/ heart septal defect

tabix /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/phecode-747.11-both_sexes.meta.hg38.tsv.gz chr1:168662007-170854080

zcat /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_liftover/hg38_summary_statistics/phecode-747.11-both_sexes.meta.hg38.tsv.gz | head -1



 [1] "chr"       "pos"       "a1"        "a2"        "id"        "beta"     
 [7] "se"        "p"         "n_case"    "n_control" "af"        "maf"      
[13] "total"     "cases_fr"  "chrpos"   


Multi-allelic snps: 0



PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
   0.1640    0.5730    0.0502    0.1750    0.0379 
[1] "PP abf for shared variant: 3.79%"


Saved: /projects/CARDIPS/analysis/epigenome_resource/analyses/jennifer/gwas_coloc/regressed_qtls/eqtls/iPSC/0-ENSG00000000460.17.robj

