# Module QTL and GWAS Colocalization

**Created**: 2 June 2022

## Environment

In [1]:
library(tidyverse)
library(data.table)
library(coloc)
library(susieR)

setwd("~/eQTL_pQTL_Characterization/")

source("01_Colocalization/scripts/utils/ggplot_theme.R")
source("01_Colocalization/scripts/utils/iupac.R")

── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.8
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’

## Load Module QTL Data

In [2]:
geno.bim <- fread("/nfs/users/nfs_n/nm18/gains_team282/Genotyping/All_genotyping_merged_filtered_b38_refiltered_rsID.bim")

colnames(geno.bim) <- c("chr", "snp", "cM", "pos", "minor_allele", "major_allele")

In [3]:
head(geno.bim)

chr,snp,cM,pos,minor_allele,major_allele
<chr>,<chr>,<int>,<int>,<chr>,<chr>
1,rs3131972,0,817341,A,G
1,rs546843995,0,818053,0,G
1,rs553916047,0,818359,0,A
1,1:818740_T_C,0,818740,T,C
1,rs145604921,0,819378,0,C
1,rs535256652,0,821053,0,T


In [4]:
module.ss.dir = "/nfs/users/nfs_n/nm18/gains_team282/nikhil/expression/eigengene_sva/wgcna_summary_statistics/"
module.qtl.sum <- do.call(rbind, lapply(list.files(module.ss.dir, pattern="ME_[0-9]+_[0-9]+-.*\\.tsv"), function(file.name) {

    fread(paste0(module.ss.dir, file.name)) %>%
    as.data.frame() %>%
    dplyr::select(snp=1, beta=2, se=3, t=4, p=5) %>%
    dplyr::mutate(module.qtl=gsub("\\.tsv", "", file.name)) %>%
    dplyr::mutate(module=gsub("_[0-9]+-.*$", "", module.qtl)) %>%
    dplyr::mutate(pc=gsub("-.*$", "", gsub("ME_[0-9]+_", "", module.qtl))) %>%
    dplyr::mutate(qtl.locus=gsub("ME_[0-9]+_[0-9]+-", "", module.qtl)) %>%
    dplyr::mutate(qtl.locus.chr=gsub("\\:.*", "", qtl.locus)) %>%
    dplyr::mutate(qtl.locus.start=as.numeric(gsub(".*\\:", "", gsub("-.*$", "", qtl.locus)))) %>%
    dplyr::mutate(qtl.locus.end=as.numeric(gsub(".*-", "", qtl.locus)))
})) %>%
    merge(., geno.bim, by="snp")

In [5]:
head(module.qtl.sum)

Unnamed: 0_level_0,snp,beta,se,t,p,module.qtl,module,pc,qtl.locus,qtl.locus.chr,qtl.locus.start,qtl.locus.end,chr,cM,pos,minor_allele,major_allele
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<int>,<int>,<chr>,<chr>
1,rs10000031,0.0019267651,0.001094589,1.7602633,0.07874832,ME_87_1-4:67472593-69475768,ME_87,1,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A
2,rs10000031,-0.0021730771,0.002620205,-0.8293539,0.40721463,ME_87_5-4:67472593-69475768,ME_87,5,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A
3,rs10000031,0.0003155103,0.002484468,0.1269931,0.89895231,ME_87_2-4:67472593-69475768,ME_87,2,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A
4,rs10000031,0.0010224658,0.002528719,0.4043415,0.68610817,ME_87_3-4:67472593-69475768,ME_87,3,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A
5,rs10000031,0.0025630543,0.002457255,1.0430561,0.29739909,ME_87_4-4:67472593-69475768,ME_87,4,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A
6,rs10000051,-0.0016114074,0.001729396,-0.9317744,0.35160254,ME_86_5-4:117564190-120834939,ME_86,5,4:117564190-120834939,4,117564190,120834939,4,0,120370537,T,C


In [6]:
mqtl.geno <- fread("~/gains_team282/nikhil/data/genotypes/eigengene_sva_ss_genotypes.raw", sep=" ", drop=2:6) %>%
    as.data.frame()

rownames(mqtl.geno) <- mqtl.geno$FID
mqtl.geno$FID <- NULL
colnames(mqtl.geno) <- gsub("_.*$", "", colnames(mqtl.geno))

## Load Module Eigengenes

In [7]:
eigengenes <- read.csv("~/gains_team282/nikhil/expression/gene_expression/eigengenes.multiple.csv", row.names=1)

In [8]:
head(eigengenes)

Unnamed: 0_level_0,ME_1_1,ME_1_2,ME_1_3,ME_1_4,ME_1_5,ME_2_1,ME_2_2,ME_2_3,ME_2_4,ME_2_5,⋯,ME_105_1,ME_105_2,ME_105_3,ME_105_4,ME_105_5,ME_106_1,ME_106_2,ME_106_3,ME_106_4,ME_106_5
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UK02270173_3,0.008640296,-0.04165815,0.027430556,-0.020437463,0.018138657,0.007926439,-0.03881513,0.03058366,-0.014329184,0.009126178,⋯,0.033258706,0.010337824,0.031572946,0.0308363822,-0.01556043,-0.003317747,0.01850385,0.001776021,0.034254655,0.002439359
UK15130120_3,-0.037073495,-0.01795641,-0.007283072,-0.026494085,0.003339612,-0.03317947,-0.01991901,-0.01688432,-0.014852805,0.049310271,⋯,0.051184001,-0.001611603,0.005624913,0.027353894,-0.03670228,-0.012327128,0.04405671,0.013144923,0.017987682,-0.001779326
UK58000006_3,0.029670524,-0.03481715,0.02849114,-0.009483861,-0.005862293,0.02149011,-0.03480085,0.03014656,-0.003961906,-0.032946862,⋯,-0.00662284,-0.045791646,0.033491934,-0.0420056572,0.02749077,-0.004740631,0.04638229,-0.043309799,0.028730908,-0.024051766
UK47010004_3,0.011954974,0.01091971,0.038665347,0.003726632,0.052019153,0.007360515,0.01763746,0.03490663,-0.005573984,-0.016190548,⋯,-0.003072117,-0.010005728,0.019263388,0.0009626484,0.06116524,0.012014706,0.01637633,-0.003045472,-0.018223178,-0.020637855
UK42020088_5,0.009818957,0.02100233,-0.01985218,-0.013052613,-0.030223051,0.008322535,0.01712902,-0.02394729,0.003193689,0.019716419,⋯,-0.032697222,-0.001257679,-0.01976095,-0.0079000979,0.02988006,0.007915598,-0.0164968,0.01325058,-0.031445321,-0.01416897
UK47490007_3,-0.021930377,0.03807246,0.042329128,-0.025160071,0.012732846,-0.024756322,0.03899339,0.03987212,-0.030754242,0.038550679,⋯,-0.00360727,0.043079589,0.025888343,0.041338696,0.0436034,0.024871961,-0.03349057,0.029665102,-0.009251542,-0.052908888


## Load EBI SNPs that are Module QTL

In [9]:
ebi.mqtl <- read.csv("~/gains_team282/nikhil/expression/eigengene_sva/mqtl_all_pcs_ebi_snps.csv", row.names=1)

## Neutrophil Counts

**Study**: [GCST90002398](https://www.ebi.ac.uk/gwas/studies/GCST90002398)

In [37]:
ntr.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002398_buildGRCh37.tsv", sep="\t")

In [38]:
ntr.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002398")

ntr.snps <- unique(ntr.me$snp)

In [39]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ntr.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [40]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

ntr.ct <- ntr.ct %>%
    dplyr::filter(variant_id %in% all.snps)

In [41]:
ntr.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = ntr.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "other_allele", "effect_allele"),
            by.y=c("m.snp", "major_allele", "minor_allele")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [77]:
ntr.coloc  %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
25004,2.180803e-192,9.549674e-13,3.560213e-182,0.0154326,0.9845674,ME_84_5-6:28836283-33995104,1,0.9845674,True
3367,7.347e-321,1.28517e-302,5.836141e-22,2.099098e-05,0.999979,ME_103_3-12:68275923-70376531,1,0.999979,True
3367,1.6636949999999998e-250,2.910454e-232,5.836141e-22,2.099098e-05,0.999979,ME_103_2-12:68275923-70376531,1,0.999979,True
3367,3.5465570000000005e-69,6.204319e-51,5.836141e-22,2.099099e-05,0.999979,ME_103_4-12:68275923-70376531,1,0.999979,True
3367,1.139342e-46,1.993155e-28,2.545045e-21,0.003455738,0.9965443,ME_103_1-12:68275923-70376531,1,0.9965443,True


In [43]:
rm(ntr.ct)

## Platelet Counts

**Study**: [GCST90002402](https://www.ebi.ac.uk/gwas/studies/GCST90002402)

In [44]:
plt.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002402_buildGRCh37.tsv", sep="\t")

In [45]:
plt.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002402")

plt.snps <- unique(plt.me$snp)

In [46]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% plt.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [47]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

plt.ct <- plt.ct %>%
    dplyr::filter(variant_id %in% all.snps)

In [48]:
plt.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = plt.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "other_allele", "effect_allele"),
            by.y=c("m.snp", "major_allele", "minor_allele")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [76]:
plt.coloc %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
2318,3.120899e-31,4.264181e-07,1.550307e-26,0.02020253,0.979797,ME_63_4-11:46459981-48629440,0.9999996,0.9797975,True


In [50]:
rm(plt.ct)

## Monocyte Counts

**Study**: [GCST90002393](https://www.ebi.ac.uk/gwas/studies/GCST90002393)

In [51]:
mnc.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002393_buildGRCh37.tsv", sep="\t")

In [52]:
mnc.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002393")

mnc.snps <- unique(mnc.me$snp)

In [53]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% mnc.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [54]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

mnc.ct <- mnc.ct %>%
    dplyr::filter(variant_id %in% all.snps)

In [55]:
mnc.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = mnc.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "other_allele", "effect_allele"),
            by.y=c("m.snp", "major_allele", "minor_allele")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [75]:
mnc.coloc %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>


In [57]:
rm(mnc.ct)

## Lymphocyte Counts

**Study**: [GCST90002388](https://www.ebi.ac.uk/gwas/studies/GCST90002388)

In [58]:
lym.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002388_buildGRCh37.tsv", sep="\t")

In [59]:
lym.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002388")

lym.snps <- unique(lym.me$snp)

In [60]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% lym.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [61]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

lym.ct <- lym.ct %>%
    dplyr::filter(variant_id %in% all.snps)

In [62]:
lym.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = lym.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "other_allele", "effect_allele"),
            by.y=c("m.snp", "major_allele", "minor_allele")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [74]:
lym.coloc %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1692,4.376502e-15,3.42874e-11,3.110362e-06,0.02339131,0.9766056,ME_101_4-12:54985643-57100037,0.9999969,0.9766086,True


In [66]:
rm(lym.ct)

“object 'lym.ct' not found”


## Eosinophil Counts

**Study**: [GCST90002381](https://www.ebi.ac.uk/gwas/studies/GCST90002381)

In [65]:
eos.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002388_buildGRCh37.tsv", sep="\t")

In [67]:
eos.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002388")

eos.snps <- unique(eos.me$snp)

In [68]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% eos.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [69]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

eos.ct <- eos.ct %>%
    dplyr::filter(variant_id %in% all.snps)

In [70]:
eos.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = eos.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "other_allele", "effect_allele"),
            by.y=c("m.snp", "major_allele", "minor_allele")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [73]:
eos.coloc %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1692,4.376502e-15,3.42874e-11,3.110362e-06,0.02339131,0.9766056,ME_101_4-12:54985643-57100037,0.9999969,0.9766086,True


In [72]:
rm(eos.ct)

## Basophil Counts

**Study**: [GCST004618](https://www.ebi.ac.uk/gwas/studies/GCST004618)

In [79]:
bas.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/baso_N171846_narrow_form.tsv", sep="\t")

In [80]:
head(bas.ct)

VARIANT,ID_dbSNP49,CHR,BP,REF,ALT,ALT_MINOR,DIRECTION,EFFECT,SE,P,MLOG10P,ALT_FREQ,MA_FREQ
<chr>,<chr>,<int>,<int>,<chr>,<chr>,<lgl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1:10177_A_AC,rs367896724,1,10177,A,AC,True,-,-4.836126e-05,0.00483081,0.992,0.003482852,0.3923,0.3923
1:10352_T_TA,rs555500075,1,10352,T,TA,True,+,0.005007469,0.004966583,0.3133,0.5039799,0.3855,0.3855
1:10616_CCGCCGTTGCAAAGGCGCGCCG_C,rs376342519,1,10616,CCGCCGTTGCAAAGGCGCGCCG,C,False,-,-0.00138152,0.03171538,0.9653,0.01535786,0.9942,0.0058
1:11008_C_G,rs575272151,1,11008,C,G,True,-,-0.006713662,0.008351495,0.4215,0.3752416,0.0853,0.0853
1:11012_C_G,rs544419019,1,11012,C,G,True,-,-0.007852701,0.008403985,0.3501,0.4558122,0.0846,0.0846
1:13110_G_A,rs540538026,1,13110,G,A,True,-,-0.003841496,0.01066627,0.7187,0.1434328,0.0592,0.0592


In [81]:
bas.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST004618")

bas.snps <- unique(bas.me$snp)

In [82]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% bas.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [83]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

bas.ct <- bas.ct %>%
    dplyr::filter(ID_dbSNP49 %in% all.snps)

In [84]:
bas.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = bas.ct %>%
        dplyr::filter(ID_dbSNP49 %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("ID_dbSNP49"),
            by.y=c("m.snp")
        ) %>%
        dplyr::select(snp=ID_dbSNP49, m.beta, m.se, pos=m.pos, g.beta=EFFECT, g.se=SE, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [85]:
bas.coloc %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>


In [86]:
rm(bas.ct)

## Erythrocyte Counts

**Study**: [GCST90002403](https://www.ebi.ac.uk/gwas/studies/GCST90002403)

In [88]:
ert.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002403_buildGRCh37.tsv", sep="\t")

“Previous fread() session was not cleaned up properly. Cleaned up ok at the beginning of this fread() call.”


In [89]:
ert.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002403")

ert.snps <- unique(ert.me$snp)

In [90]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ert.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [91]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

ert.ct <- ert.ct %>%
    dplyr::filter(variant_id %in% all.snps)

In [92]:
ert.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele)
    
    all.info = ert.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "other_allele", "effect_allele"),
            by.y=c("m.snp", "major_allele", "minor_allele")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [93]:
ert.coloc %>%
    dplyr::filter(Colocalise)

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>


In [94]:
rm(ert.ct)