# Module QTL and GWAS Colocalization

**Created**: 2 June 2022

## Environment

In [68]:
library(tidyverse)
library(data.table)
library(GenomicRanges)
library(rtracklayer)
library(coloc)
library(susieR)

setwd("~/eQTL_pQTL_Characterization/")

source("01_Colocalization/scripts/utils/ggplot_theme.R")
source("01_Colocalization/scripts/utils/iupac.R")

## Load Module QTL Data

In [2]:
geno.bim <- fread("/nfs/users/nfs_n/nm18/gains_team282/Genotyping/All_genotyping_merged_filtered_b38_refiltered_rsID.bim")

colnames(geno.bim) <- c("chr", "snp", "cM", "pos", "minor_allele", "major_allele")

In [3]:
head(geno.bim)

chr,snp,cM,pos,minor_allele,major_allele
<chr>,<chr>,<int>,<int>,<chr>,<chr>
1,rs3131972,0,817341,A,G
1,rs546843995,0,818053,0,G
1,rs553916047,0,818359,0,A
1,1:818740_T_C,0,818740,T,C
1,rs145604921,0,819378,0,C
1,rs535256652,0,821053,0,T


In [4]:
module.ss.dir = "/nfs/users/nfs_n/nm18/gains_team282/nikhil/expression/eigengene_sva/wgcna_summary_statistics/"
module.qtl.sum <- do.call(rbind, lapply(list.files(module.ss.dir, pattern="ME_[0-9]+_[0-9]+-.*\\.tsv"), function(file.name) {

    fread(paste0(module.ss.dir, file.name)) %>%
    as.data.frame() %>%
    dplyr::select(snp=1, beta=2, se=3, t=4, p=5) %>%
    dplyr::mutate(module.qtl=gsub("\\.tsv", "", file.name)) %>%
    dplyr::mutate(module=gsub("_[0-9]+-.*$", "", module.qtl)) %>%
    dplyr::mutate(pc=gsub("-.*$", "", gsub("ME_[0-9]+_", "", module.qtl))) %>%
    dplyr::mutate(qtl.locus=gsub("ME_[0-9]+_[0-9]+-", "", module.qtl)) %>%
    dplyr::mutate(qtl.locus.chr=gsub("\\:.*", "", qtl.locus)) %>%
    dplyr::mutate(qtl.locus.start=as.numeric(gsub(".*\\:", "", gsub("-.*$", "", qtl.locus)))) %>%
    dplyr::mutate(qtl.locus.end=as.numeric(gsub(".*-", "", qtl.locus)))
})) %>%
    merge(., geno.bim, by="snp") %>%
    dplyr::mutate(IUPAC = alleles.iupac(minor_allele, major_allele))

In [5]:
head(module.qtl.sum)

Unnamed: 0_level_0,snp,beta,se,t,p,module.qtl,module,pc,qtl.locus,qtl.locus.chr,qtl.locus.start,qtl.locus.end,chr,cM,pos,minor_allele,major_allele,IUPAC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<int>,<int>,<chr>,<chr>,<chr>
1,rs10000031,0.0019267651,0.001094589,1.7602633,0.07874832,ME_87_1-4:67472593-69475768,ME_87,1,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A,R
2,rs10000031,-0.0021730771,0.002620205,-0.8293539,0.40721463,ME_87_5-4:67472593-69475768,ME_87,5,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A,R
3,rs10000031,0.0003155103,0.002484468,0.1269931,0.89895231,ME_87_2-4:67472593-69475768,ME_87,2,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A,R
4,rs10000031,0.0010224658,0.002528719,0.4043415,0.68610817,ME_87_3-4:67472593-69475768,ME_87,3,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A,R
5,rs10000031,0.0025630543,0.002457255,1.0430561,0.29739909,ME_87_4-4:67472593-69475768,ME_87,4,4:67472593-69475768,4,67472593,69475768,4,0,68683231,G,A,R
6,rs10000051,-0.0016114074,0.001729396,-0.9317744,0.35160254,ME_86_5-4:117564190-120834939,ME_86,5,4:117564190-120834939,4,117564190,120834939,4,0,120370537,T,C,Y


In [6]:
mqtl.geno <- fread("~/gains_team282/nikhil/data/genotypes/eigengene_sva_ss_genotypes.raw", sep=" ", drop=2:6) %>%
    as.data.frame()

rownames(mqtl.geno) <- mqtl.geno$FID
mqtl.geno$FID <- NULL
colnames(mqtl.geno) <- gsub("_.*$", "", colnames(mqtl.geno))

## Load Module Eigengenes

In [7]:
eigengenes <- read.csv("~/gains_team282/nikhil/expression/gene_expression/eigengenes.multiple.csv", row.names=1)

In [8]:
head(eigengenes)

Unnamed: 0_level_0,ME_1_1,ME_1_2,ME_1_3,ME_1_4,ME_1_5,ME_2_1,ME_2_2,ME_2_3,ME_2_4,ME_2_5,⋯,ME_105_1,ME_105_2,ME_105_3,ME_105_4,ME_105_5,ME_106_1,ME_106_2,ME_106_3,ME_106_4,ME_106_5
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UK02270173_3,0.008640296,-0.04165815,0.027430556,-0.020437463,0.018138657,0.007926439,-0.03881513,0.03058366,-0.014329184,0.009126178,⋯,0.033258706,0.010337824,0.031572946,0.0308363822,-0.01556043,-0.003317747,0.01850385,0.001776021,0.034254655,0.002439359
UK15130120_3,-0.037073495,-0.01795641,-0.007283072,-0.026494085,0.003339612,-0.03317947,-0.01991901,-0.01688432,-0.014852805,0.049310271,⋯,0.051184001,-0.001611603,0.005624913,0.027353894,-0.03670228,-0.012327128,0.04405671,0.013144923,0.017987682,-0.001779326
UK58000006_3,0.029670524,-0.03481715,0.02849114,-0.009483861,-0.005862293,0.02149011,-0.03480085,0.03014656,-0.003961906,-0.032946862,⋯,-0.00662284,-0.045791646,0.033491934,-0.0420056572,0.02749077,-0.004740631,0.04638229,-0.043309799,0.028730908,-0.024051766
UK47010004_3,0.011954974,0.01091971,0.038665347,0.003726632,0.052019153,0.007360515,0.01763746,0.03490663,-0.005573984,-0.016190548,⋯,-0.003072117,-0.010005728,0.019263388,0.0009626484,0.06116524,0.012014706,0.01637633,-0.003045472,-0.018223178,-0.020637855
UK42020088_5,0.009818957,0.02100233,-0.01985218,-0.013052613,-0.030223051,0.008322535,0.01712902,-0.02394729,0.003193689,0.019716419,⋯,-0.032697222,-0.001257679,-0.01976095,-0.0079000979,0.02988006,0.007915598,-0.0164968,0.01325058,-0.031445321,-0.01416897
UK47490007_3,-0.021930377,0.03807246,0.042329128,-0.025160071,0.012732846,-0.024756322,0.03899339,0.03987212,-0.030754242,0.038550679,⋯,-0.00360727,0.043079589,0.025888343,0.041338696,0.0436034,0.024871961,-0.03349057,0.029665102,-0.009251542,-0.052908888


## Load EBI SNPs that are Module QTL

In [9]:
ebi.mqtl <- read.csv("~/gains_team282/nikhil/expression/eigengene_sva/mqtl_all_pcs_ebi_snps.csv", row.names=1)

## C-Reactive Proteins

**Study**: [GCST009777](https://www.ebi.ac.uk/gwas/studies/GCST009777)

In [112]:
c.prot <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/CRP_all_SNPs.txt", sep=",")

In [113]:
c.prot.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST009777")

c.prot.snps <- unique(c.prot.me$snp)

In [114]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% c.prot.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [115]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

c.prot <- c.prot %>%
    dplyr::filter(SNP %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(ALLELE1, ALLELE0))

In [120]:
c.prot.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = c.prot %>%
        dplyr::filter(SNP %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("SNP", "IUPAC"),
            by.y=c("m.snp", "IUPAC"),
        ) %>%
        dplyr::select(snp=SNP, m.beta, m.se, pos=m.pos, g.beta=BETA, g.se=SE, g.maf=A1FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 418642
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [121]:
c.prot.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,28239,4.33264e-73,4.45837e-73,0.410815,0.4227156,0.16646937,ME_84_4-6:28836283-33995104,0.589185,0.2825418,False
2,28239,1.78993e-92,1.841873e-92,0.412024,0.42396,0.164016,ME_84_3-6:28836283-33995104,0.587976,0.2789502,False
3,28239,2.906262e-11,2.990599e-11,0.4201078,0.4322805,0.14761167,ME_84_5-6:28836283-33995104,0.5798922,0.2545502,False
4,28239,6.756476e-76,6.952543e-76,0.4290605,0.4414953,0.12944417,ME_84_1-6:28836283-33995104,0.5709395,0.2267214,False
5,28239,1.463887e-75,1.5063680000000002e-75,0.4291153,0.4415517,0.12933302,ME_84_2-6:28836283-33995104,0.5708847,0.2265484,False
6,3923,4.111779e-18,1.6094090000000001e-18,0.6704128,0.2623418,0.06724538,ME_103_5-12:68275923-70376531,0.3295872,0.2040291,False


In [122]:
rm(c.prot)

## Alanine Aminotransferase

**Study**: [GCST90013405](https://www.ebi.ac.uk/gwas/studies/GCST90013405)

In [123]:
ala <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90013405_buildGRCh37.tsv", sep="\t") %>%
    as.data.frame()

In [124]:
ala.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90013405")

ala.snps <- unique(ala.me$snp)

In [125]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ala.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [126]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set) %>%
    dplyr::mutate(snpid = paste0(chr, ":", pos, "-", IUPAC)) %>%
    dplyr::select(snp, snpid)

ala.ranges <- ala %>%
    dplyr::mutate(chromosome = paste0("chr", chromosome)) %>%
    makeGRangesFromDataFrame(
        .,
        seqnames.field="chromosome", start.field="base_pair_location", end.field="base_pair_location",
        keep.extra.columns=TRUE
    )

chain = import.chain("03_Functional_Interpretation/data/hg19ToHg38.over.chain")
seqlevelsStyle(ala.ranges) = "UCSC"
ala.ranges.hg38 = liftOver(ala.ranges, chain)

overlaps <- module.qtl.sum %>%
    dplyr::filter(snp %in% ala.snps) %>%
    dplyr::select(qtl.locus.chr, qtl.locus.start, qtl.locus.end) %>%
    dplyr::mutate(qtl.locus.chr = paste0("chr", qtl.locus.chr)) %>%
    unique() %>%
    makeGRangesFromDataFrame(., seqnames.field="qtl.locus.chr", start.field="qtl.locus.start", end.field="qtl.locus.end") %>%
    findOverlaps(ala.ranges.hg38, .)

ala <- ala.ranges.hg38[overlaps@from, ] %>%
    as.data.frame() %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele)) %>%
    dplyr::mutate(seqnames = gsub("^chr", "", seqnames)) %>%
    dplyr::mutate(snpid = paste0(seqnames, ":", start, "-", IUPAC)) %>%
    merge(., all.snps, by="snpid") %>%
    dplyr::select(snp, beta, standard_error, effect_allele_frequency, IUPAC) %>%
    unique()

In [127]:
ala.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = ala %>%
        dplyr::filter(snp %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("snp", "IUPAC"),
            by.y=c("m.snp", "IUPAC"),
        ) %>%
        dplyr::select(snp=snp, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=effect_allele_frequency) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 437267
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [130]:
ala.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,2192,2.247531e-192,4.782423e-187,1.559584e-07,0.03221793,0.9677819,ME_101_3-12:54985643-57100037,0.9999998,0.96778207,True
2,2192,8.292256e-33,1.764473e-27,2.047348e-07,0.04260723,0.9573926,ME_101_1-12:54985643-57100037,0.9999998,0.95739276,True
3,2192,1.162068e-18,2.472713e-13,2.192277e-07,0.04569418,0.9543056,ME_101_4-12:54985643-57100037,0.9999998,0.95430581,True
4,2192,3.386816e-42,7.206657e-37,3.166564e-07,0.06644637,0.9335533,ME_101_2-12:54985643-57100037,0.9999997,0.93355361,True
5,2192,2.428823e-06,0.5168185,1.134332e-06,0.24112752,0.2420504,ME_101_5-12:54985643-57100037,0.4831779,0.50095503,False
6,2318,3.89018e-06,0.7386509,1.24392e-06,0.23616501,0.025179,ME_96_3-12:54681479-56681478,0.261344,0.09634428,False


In [133]:
rm(ala)

## Alkaline Phosphatase

**Study**: [GCST90013406](https://www.ebi.ac.uk/gwas/studies/GCST90013406)

In [144]:
akp <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90013406_buildGRCh37.tsv", sep="\t") %>%
    as.data.frame()

In [146]:
akp.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90013406")

akp.snps <- unique(akp.me$snp)

In [147]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% akp.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [148]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set) %>%
    dplyr::mutate(snpid = paste0(chr, ":", pos, "-", IUPAC)) %>%
    dplyr::select(snp, snpid)

akp.ranges <- akp %>%
    dplyr::mutate(chromosome = paste0("chr", chromosome)) %>%
    makeGRangesFromDataFrame(
        .,
        seqnames.field="chromosome", start.field="base_pair_location", end.field="base_pair_location",
        keep.extra.columns=TRUE
    )

chain = import.chain("03_Functional_Interpretation/data/hg19ToHg38.over.chain")
seqlevelsStyle(akp.ranges) = "UCSC"
akp.ranges.hg38 = liftOver(akp.ranges, chain)

overlaps <- module.qtl.sum %>%
    dplyr::filter(snp %in% akp.snps) %>%
    dplyr::select(qtl.locus.chr, qtl.locus.start, qtl.locus.end) %>%
    dplyr::mutate(qtl.locus.chr = paste0("chr", qtl.locus.chr)) %>%
    unique() %>%
    makeGRangesFromDataFrame(., seqnames.field="qtl.locus.chr", start.field="qtl.locus.start", end.field="qtl.locus.end") %>%
    findOverlaps(akp.ranges.hg38, .)

akp <- akp.ranges.hg38[overlaps@from, ] %>%
    as.data.frame() %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele)) %>%
    dplyr::mutate(seqnames = gsub("^chr", "", seqnames)) %>%
    dplyr::mutate(snpid = paste0(seqnames, ":", start, "-", IUPAC)) %>%
    merge(., all.snps, by="snpid") %>%
    dplyr::select(snp, beta, standard_error, effect_allele_frequency, IUPAC) %>%
    unique()

In [149]:
akp.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = akp %>%
        dplyr::filter(snp %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("snp", "IUPAC"),
            by.y=c("m.snp", "IUPAC"),
        ) %>%
        dplyr::select(snp=snp, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=effect_allele_frequency) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 437438
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [150]:
akp.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,6241,3.696639e-14,0.4202837,3.778903e-14,0.4294864,0.1502299,ME_69_5-17:44378976-48159837,0.5797163,0.2591438,False
2,6241,3.7995940000000002e-28,4.319891e-15,7.016067e-14,0.7974785,0.2025215,ME_69_4-17:44378976-48159837,1.0,0.2025215,False
3,6241,0.0,0.0,8.499708e-14,0.9663277,0.0336723,ME_69_1-17:44378976-48159837,1.0,0.0336723,False
4,6241,2.397737e-59,2.726071e-46,8.795579e-14,1.0,5.183392e-10,ME_69_2-17:44378976-48159837,1.0,5.183392e-10,False
5,6241,1.14679e-47,1.303825e-34,8.795579e-14,1.0,5.1036e-10,ME_69_3-17:44378976-48159837,1.0,5.1036e-10,False


In [151]:
rm(akp)

## Interleukin 18

**Study**: [GCST90012024](https://www.ebi.ac.uk/gwas/studies/GCST90012024)

In [171]:
il18 <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90012024_buildGRCh37.tsv", sep="\t") %>%
    as.data.frame() %>%
    dplyr::mutate(effect_allele=str_to_upper(effect_allele)) %>%
    dplyr::mutate(other_allele=str_to_upper(other_allele))

In [172]:
il18.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90012024")

il18.snps <- unique(il18.me$snp)

In [173]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% il18.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [174]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set) %>%
    dplyr::mutate(snpid = paste0(chr, ":", pos, "-", IUPAC)) %>%
    dplyr::select(snp, snpid)

il18.ranges <- il18 %>%
    dplyr::mutate(chromosome = paste0("chr", chromosome)) %>%
    makeGRangesFromDataFrame(
        .,
        seqnames.field="chromosome", start.field="base_pair_location", end.field="base_pair_location",
        keep.extra.columns=TRUE
    )

chain = import.chain("03_Functional_Interpretation/data/hg19ToHg38.over.chain")
seqlevelsStyle(il18.ranges) = "UCSC"
il18.ranges.hg38 = liftOver(il18.ranges, chain)

overlaps <- module.qtl.sum %>%
    dplyr::filter(snp %in% il18.snps) %>%
    dplyr::select(qtl.locus.chr, qtl.locus.start, qtl.locus.end) %>%
    dplyr::mutate(qtl.locus.chr = paste0("chr", qtl.locus.chr)) %>%
    unique() %>%
    makeGRangesFromDataFrame(., seqnames.field="qtl.locus.chr", start.field="qtl.locus.start", end.field="qtl.locus.end") %>%
    findOverlaps(il18.ranges.hg38, .)

il18 <- il18.ranges.hg38[overlaps@from, ] %>%
    as.data.frame() %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele)) %>%
    dplyr::mutate(seqnames = gsub("^chr", "", seqnames)) %>%
    dplyr::mutate(snpid = paste0(seqnames, ":", start, "-", IUPAC)) %>%
    merge(., all.snps, by="snpid") %>%
    dplyr::select(snp, beta, standard_error, effect_allele_frequency, IUPAC) %>%
    unique()

duplicated.snps <- il18$snp[which(duplicated(il18$snp))]

il18 <- il18 %>%
    dplyr::filter(!(snp %in% duplicated.snps))

In [176]:
il18.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = il18 %>%
        dplyr::filter(snp %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("snp", "IUPAC"),
            by.y=c("m.snp", "IUPAC"),
        ) %>%
        dplyr::select(snp=snp, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=effect_allele_frequency) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 21758
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [177]:
il18.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,2598,5.038684e-15,1.83651e-08,2.743574e-07,0.9999839,1.582601e-05,ME_89_3-8:142513074-144574583,0.9999997,1.582602e-05,False
2,2598,1.6314419999999998e-19,5.946315e-13,2.743617e-07,0.9999995,1.816956e-07,ME_89_4-8:142513074-144574583,0.9999997,1.816956e-07,False
3,2598,2.168345e-137,7.903229e-131,2.743618e-07,0.9999997,9.575173e-09,ME_89_1-8:142513074-144574583,0.9999997,9.575176e-09,False
4,2598,7.441225999999999e-30,2.712194e-23,2.743618e-07,0.9999997,9.133277e-09,ME_89_2-8:142513074-144574583,0.9999997,9.133279e-09,False
5,2598,1.7566819999999998e-111,6.402794e-105,2.743618e-07,0.9999997,8.102239e-09,ME_89_5-8:142513074-144574583,0.9999997,8.102242e-09,False


In [178]:
rm(il18)

## Lymphocyte Counts

**Study**: [GCST90002388](https://www.ebi.ac.uk/gwas/studies/GCST90002388)

In [179]:
lym.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002388_buildGRCh37.tsv", sep="\t")

In [180]:
lym.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002388")

lym.snps <- unique(lym.me$snp)

In [181]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% lym.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [182]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

lym.ct <- lym.ct %>%
    dplyr::filter(variant_id %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [184]:
lym.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = lym.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [185]:
lym.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,2182,2.969159e-192,5.0872850000000005e-188,2.060329e-07,0.002532651,0.9974671,ME_101_3-12:54985643-57100037,0.9999998,0.9974673,True
2,2182,1.217712e-32,2.0863990000000003e-28,3.006518e-07,0.00415545,0.9958442,ME_101_1-12:54985643-57100037,0.9999997,0.9958445,True
3,2182,1.8289050000000002e-18,3.133601e-14,3.450285e-07,0.004916552,0.9950831,ME_101_4-12:54985643-57100037,0.9999997,0.9950834,True
4,2182,4.168121e-42,7.141557e-38,3.897058e-07,0.005682808,0.9943168,ME_101_2-12:54985643-57100037,0.9999996,0.9943172,True
5,2182,9.125561e-06,0.1563552,4.255334e-06,0.072138366,0.7714931,ME_101_5-12:54985643-57100037,0.8436315,0.9144907,True
6,15841,0.0,0.239125,0.0,0.278142784,0.4827322,ME_92_1-6:29991224-32464002,0.760875,0.6344435,False


In [186]:
rm(lym.ct)

## Neutrophil Counts

**Study**: [GCST90002398](https://www.ebi.ac.uk/gwas/studies/GCST90002398)

In [187]:
ntr.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002398_buildGRCh37.tsv", sep="\t")

In [188]:
ntr.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002398")

ntr.snps <- unique(ntr.me$snp)

In [189]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ntr.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [190]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

ntr.ct <- ntr.ct %>%
    dplyr::filter(variant_id %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [191]:
ntr.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = ntr.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [192]:
ntr.coloc  %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,3940,7.347e-321,1.285174e-302,5.836537000000001e-22,2.099423e-05,0.999979,ME_103_3-12:68275923-70376531,1.0,0.999979,True
2,3940,1.663807e-250,2.9104639999999997e-232,5.836537000000001e-22,2.099423e-05,0.999979,ME_103_2-12:68275923-70376531,1.0,0.999979,True
3,3940,3.5467970000000004e-69,6.204338e-51,5.836720000000001e-22,2.102631e-05,0.999979,ME_103_4-12:68275923-70376531,1.0,0.999979,True
4,3940,1.139418e-46,1.9931600000000001e-28,2.545606e-21,0.003456432,0.9965436,ME_103_1-12:68275923-70376531,1.0,0.9965436,True
5,29066,1.975376e-192,7.610087e-12,2.774216e-182,0.1067703,0.8932297,ME_84_5-6:28836283-33995104,1.0,0.8932297,True
6,19700,2.6835220000000003e-159,0.2836089,5.713878000000001e-159,0.6038441,0.112547,ME_97_2-6:28558600-31905730,0.7163911,0.1571027,False


In [199]:
rm(ntr.ct)

## Monocyte Counts

**Study**: [GCST90002393](https://www.ebi.ac.uk/gwas/studies/GCST90002393)

In [193]:
mnc.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002393_buildGRCh37.tsv", sep="\t")

In [194]:
mnc.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002393")

mnc.snps <- unique(mnc.me$snp)

In [195]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% mnc.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [196]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

mnc.ct <- mnc.ct %>%
    dplyr::filter(variant_id %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [197]:
mnc.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = mnc.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [198]:
mnc.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,19700,4.0825000000000004e-169,0.3054576,5.623349e-169,0.4206753,0.27386705,ME_97_3-6:28558600-31905730,0.6945424,0.39431294,False
2,19700,6.285794000000001e-169,0.4703108,5.654876e-169,0.4230773,0.10661189,ME_97_5-6:28558600-31905730,0.5296892,0.20127255,False
3,15841,5.272284e-169,0.4905629,4.7879030000000003e-169,0.4454679,0.06396916,ME_92_3-6:29991224-32464002,0.5094371,0.12556833,False
4,19700,3.9322100000000005e-169,0.2942127,8.372641e-169,0.6264308,0.07935645,ME_97_2-6:28558600-31905730,0.7057873,0.11243678,False
5,19700,6.769981e-169,0.5065383,6.016767e-169,0.4501708,0.04329093,ME_97_1-6:28558600-31905730,0.4934617,0.08772906,False
6,15841,6.104748000000001e-169,0.5680201,4.2863740000000005e-169,0.3988151,0.03316477,ME_92_4-6:29991224-32464002,0.4319799,0.07677386,False


In [200]:
rm(mnc.ct)

## Eosinophil Counts

**Study**: [GCST90002381](https://www.ebi.ac.uk/gwas/studies/GCST90002381)

In [201]:
eos.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002388_buildGRCh37.tsv", sep="\t")

In [202]:
eos.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002388")

eos.snps <- unique(eos.me$snp)

In [203]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% eos.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [204]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

eos.ct <- eos.ct %>%
    dplyr::filter(variant_id %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [205]:
eos.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = eos.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [206]:
eos.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,2182,2.969159e-192,5.0872850000000005e-188,2.060329e-07,0.002532651,0.9974671,ME_101_3-12:54985643-57100037,0.9999998,0.9974673,True
2,2182,1.217712e-32,2.0863990000000003e-28,3.006518e-07,0.00415545,0.9958442,ME_101_1-12:54985643-57100037,0.9999997,0.9958445,True
3,2182,1.8289050000000002e-18,3.133601e-14,3.450285e-07,0.004916552,0.9950831,ME_101_4-12:54985643-57100037,0.9999997,0.9950834,True
4,2182,4.168121e-42,7.141557e-38,3.897058e-07,0.005682808,0.9943168,ME_101_2-12:54985643-57100037,0.9999996,0.9943172,True
5,2182,9.125561e-06,0.1563552,4.255334e-06,0.072138366,0.7714931,ME_101_5-12:54985643-57100037,0.8436315,0.9144907,True
6,15841,0.0,0.239125,0.0,0.278142784,0.4827322,ME_92_1-6:29991224-32464002,0.760875,0.6344435,False


In [207]:
rm(eos.ct)

## Basophil Counts

**Study**: [GCST004618](https://www.ebi.ac.uk/gwas/studies/GCST004618)

In [208]:
bas.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/baso_N171846_narrow_form.tsv", sep="\t")

In [210]:
bas.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST004618")

bas.snps <- unique(bas.me$snp)

In [211]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% bas.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [212]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

bas.ct <- bas.ct %>%
    dplyr::filter(ID_dbSNP49 %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(REF, ALT))

In [213]:
bas.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = bas.ct %>%
        dplyr::filter(ID_dbSNP49 %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("ID_dbSNP49", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=ID_dbSNP49, m.beta, m.se, pos=m.pos, g.beta=EFFECT, g.se=SE, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 171846
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [214]:
bas.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,19603,7.75273e-18,0.4852941,6.972840000000001e-18,0.4364553,0.07825068,ME_97_5-6:28558600-31905730,0.5147059,0.15202987,False
2,19603,7.990045e-18,0.5001492,7.103347e-18,0.4446306,0.05522025,ME_97_1-6:28558600-31905730,0.4998508,0.11047346,False
3,15761,7.19554e-18,0.5607556,5.052486e-18,0.3937269,0.0455175,ME_92_4-6:29991224-32464002,0.4392444,0.10362682,False
4,15761,5.622427e-18,0.4381613,6.553215e-18,0.5106779,0.05116079,ME_92_1-6:29991224-32464002,0.5618387,0.09105956,False
5,19603,6.3855220000000005e-18,0.3997116,8.798445e-18,0.5507393,0.04954914,ME_97_3-6:28558600-31905730,0.6002884,0.08254223,False
6,15761,6.4629380000000004e-18,0.5036632,5.869566e-18,0.4574054,0.03893135,ME_92_3-6:29991224-32464002,0.4963368,0.07843737,False


In [215]:
rm(bas.ct)

## Platelet Counts

**Study**: [GCST90002402](https://www.ebi.ac.uk/gwas/studies/GCST90002402)

In [216]:
plt.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002402_buildGRCh37.tsv", sep="\t")

In [217]:
plt.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002402")

plt.snps <- unique(plt.me$snp)

In [218]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% plt.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [219]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

plt.ct <- plt.ct %>%
    dplyr::filter(variant_id %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [220]:
plt.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = plt.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [221]:
plt.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,3269,3.112417e-31,4.251469e-07,1.7643459999999998e-26,0.02312357,0.976876,ME_63_4-11:46459981-48629440,0.9999996,0.97687642,True
2,4243,3.108423e-24,0.3946298,1.39636e-24,0.17684627,0.42852393,ME_88_1-14:64291924-66448148,0.6053702,0.70787087,True
3,4243,9.063324e-25,0.1150634,4.059625e-24,0.51501964,0.36991696,ME_88_4-14:64291924-66448148,0.8849366,0.41801521,False
4,15841,2.042592e-89,0.2035843,6.1195169999999996e-89,0.60985546,0.18656022,ME_92_5-6:29991224-32464002,0.7964157,0.23424981,False
5,4243,5.692669e-25,0.07227127,6.515799e-24,0.82711254,0.10061619,ME_88_3-14:64291924-66448148,0.9277287,0.10845432,False
6,15841,5.690077e-89,0.5671277,3.995218e-89,0.39818791,0.03468443,ME_92_4-6:29991224-32464002,0.4328723,0.08012623,False


In [50]:
rm(plt.ct)

## Erythrocyte Counts

**Study**: [GCST90002403](https://www.ebi.ac.uk/gwas/studies/GCST90002403)

In [222]:
ert.ct <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/GCST90002403_buildGRCh37.tsv", sep="\t")

In [223]:
ert.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST90002403")

ert.snps <- unique(ert.me$snp)

In [224]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ert.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [225]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

ert.ct <- ert.ct %>%
    dplyr::filter(variant_id %in% all.snps) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [226]:
ert.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = ert.ct %>%
        dplyr::filter(variant_id %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("variant_id", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=variant_id, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=standard_error, g.maf=MA_FREQ) %>%
        dplyr::mutate(g.maf = sapply(g.maf, function(p) { min(p, 1 - p) }))
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    N = 408112
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "quant"
    gwas.assoc$N = N
    gwas.assoc$MAF = all.info$g.maf
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [227]:
ert.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,15841,4.07367e-69,0.335936,3.6994100000000003e-69,0.3049294,0.35913458,ME_92_3-6:29991224-32464002,0.664064,0.54081321,False
2,19700,7.273595e-69,0.48233185,6.543529000000001e-69,0.4338976,0.08377056,ME_97_5-6:28558600-31905730,0.5176681,0.16182289,False
3,15841,6.759267e-69,0.55740419,4.745936e-69,0.3913541,0.0512417,ME_92_4-6:29991224-32464002,0.4425958,0.11577539,False
4,2261,1.236089e-07,0.09355238,1.073041e-06,0.8120277,0.09441875,ME_102_2-17:17145294-19676395,0.9064464,0.10416363,False
5,19700,7.665088e-69,0.50829277,6.812286000000001e-69,0.4517309,0.03997636,ME_97_1-6:28558600-31905730,0.4917072,0.08130114,False
6,19700,6.054653e-69,0.40150044,8.339848000000001e-69,0.5530263,0.0454733,ME_97_3-6:28558600-31905730,0.5984996,0.07597884,False


In [228]:
rm(ert.ct)

## Rheumatoid Arthritis

**Study**: [GCST005569](https://www.ebi.ac.uk/gwas/studies/GCST005569)

In [286]:
ra <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/eyre_2012_23143596_ra_efo0000685_1_ichip.sumstats.tsv", sep="\t")

In [287]:
head(ra)

chrom,pos,rsid,other_allele,effect_allele,p,beta,se,OR,OR_lower,OR_upper
<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1118275,rs61733845,G,A,0.3552,0.03979747,0.04304523,1.0406,0.9564075,1.132204
1,1120431,rs1320571,G,A,0.9195,-0.004309272,0.04263922,0.9957,0.9158688,1.08249
1,1135242,rs9729550,A,C,0.1089,0.037969931,0.02368447,1.0387,0.991584,1.088055
1,1218086,rs6603788,G,A,0.37,0.0329511,0.03675636,1.0335,0.9616628,1.110703
1,1335790,rs1240708,A,G,0.6963,-0.009343515,0.02393809,0.9907,0.9452913,1.03829
1,1493727,rs880051,G,A,0.5926,-0.021325787,0.03985591,0.9789,0.9053412,1.058435


In [289]:
ra.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST005569")

ra.snps <- unique(ra.me$snp)

In [290]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ra.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [292]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

ra <- ra %>%
    dplyr::filter(rsid %in% all.snps) %>%
    dplyr::filter(se > 0) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [306]:
ra.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = ra %>%
        dplyr::filter(rsid %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("rsid", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=rsid, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=se)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "cc"
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [307]:
ra.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor))

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>


In [308]:
rm(ra)

## Inflammatory Bowel Disease

**Study**: [GCST004131](https://www.ebi.ac.uk/gwas/studies/GCST004131)

In [328]:
ibd <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/ibd_build37_59957_20161107_corrected.txt", sep="\t")

In [329]:
head(ibd)

chr,pos,iupac,snp,MarkerName,Allele1,Allele2,Effect,StdErr,P.value,Direction,HetISq,HetChiSq,HetDf,HetPVal,Pval_IBDseq,Pval_IIBDGC,Pval_GWAS3,Min_single_cohort_pval
<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,100000012,K,rs10875231,1:100000012_G_T,t,g,-0.0078,0.0147,0.595,+-+,82.7,11.592,2,0.00304,0.23959,0.2746,0.0022914,0.0022914
1,10000006,R,rs186077422,1:10000006_G_A,a,g,0.0155,0.1109,0.8886,+--,54.3,4.373,2,0.1123,0.0546277,0.9403,0.404895,0.0546277
1,100000827,Y,rs6678176,1:100000827_C_T,t,c,-0.0144,0.0136,0.2915,+-+,72.4,7.253,2,0.02661,0.499568,0.647,0.00551736,0.00551736
1,100000843,Y,rs78286437,1:100000843_T_C,t,c,0.0374,0.0289,0.1954,+++,0.0,0.575,2,0.7503,0.387836,0.2238,0.869664,0.2238
1,100001138,R,rs144406489,1:100001138_A_G,a,g,-0.0212,0.0458,0.6435,--+,28.3,2.791,2,0.2478,0.296882,0.3558,0.302438,0.296882
1,100001201,K,rs76909621,1:100001201_G_T,t,g,0.0087,0.0211,0.6805,+-+,79.0,9.539,2,0.008485,0.0241856,0.303,0.0590515,0.0241856


In [330]:
ibd.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST004131")

ibd.snps <- unique(ibd.me$snp)

In [331]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% ibd.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [332]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

ibd <- ibd %>%
    dplyr::filter(snp %in% all.snps) %>%
    dplyr::filter(StdErr > 0) %>%
    dplyr::mutate(Allele1 = str_to_upper(Allele1), Allele2 = str_to_upper(Allele2)) %>%
    dplyr::mutate(IUPAC = alleles.iupac(Allele1, Allele2))

In [334]:
ibd.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = ibd %>%
        dplyr::filter(snp %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("snp", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=snp, m.beta, m.se, pos=m.pos, g.beta=Effect, g.se=StdErr)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "cc"
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [335]:
ibd.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor))

nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
3082,5.209767e-11,7.24791e-09,0.000534452,0.07342777,0.9260378,ME_71_1-3:100304904-102512523,0.9994655,0.926533,True
2936,8.230776e-10,1.145055e-07,0.001048622,0.14502894,0.8539223,ME_47_1-3:100415852-102512523,0.9989513,0.8548188,True
2936,8.868164e-05,0.01233727,0.001287713,0.17833701,0.8079493,ME_47_3-3:100415852-102512523,0.9862863,0.8191833,True
2936,3.377202e-05,0.004698319,0.001470389,0.20376857,0.790029,ME_47_4-3:100415852-102512523,0.9937975,0.7949597,True
3082,0.0003866537,0.05379188,0.00160015,0.22189313,0.7223282,ME_71_2-3:100304904-102512523,0.9442213,0.7649988,True
2936,7.492296e-11,1.042318e-08,0.001808375,0.25083136,0.7473603,ME_47_5-3:100415852-102512523,0.9981916,0.7487142,True
2541,2.298172e-30,0.6451091,7.813241000000001e-31,0.21918608,0.1357048,ME_94_2-3:47689679-50854451,0.3548909,0.3823846,False
2541,2.2530759999999998e-30,0.6324503,1.0330039999999999e-30,0.28989186,0.07765782,ME_94_1-3:47689679-50854451,0.3675497,0.2112852,False
3082,0.001143117,0.1590322,0.004770635,0.66352675,0.1715273,ME_71_3-3:100304904-102512523,0.835054,0.2054086,False
2541,2.261531e-39,6.348236e-10,3.562455e-30,0.99999996,3.783689e-08,ME_94_4-3:47689679-50854451,1.0,3.783689e-08,False


In [336]:
rm(ibd)

## Systemic Lupus Erythromatus

**Study**: [GCST003156](https://www.ebi.ac.uk/gwas/studies/GCST003156)

In [277]:
sle <- fread("~/gains_team282/nikhil/data/EBI_GWAS_Catalog/bentham_2015_26502338_sle_efo0002690_1_gwas.sumstats.tsv", sep="\t")

In [278]:
head(sle)

chrom,pos,rsid,other_allele,effect_allele,p,beta,se,OR,OR_lower,OR_upper
<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,768253,rs2977608,C,A,0.623366,0.019802627,0.0403242,1.02,0.9424873,1.103888
1,781845,rs61768199,A,G,0.889415,0.009950331,0.07156221,1.01,0.8778219,1.162081
1,787606,rs3863622,G,T,0.864496,-0.010050336,0.05889313,0.99,0.8820726,1.111133
1,787844,rs2905053,T,C,0.987826,0.0,0.0,1.0,1.0,1.0
1,790465,rs61768207,G,A,0.753144,0.019802627,0.06296614,1.02,0.9015759,1.153979
1,796375,rs12083781,T,C,0.348501,0.048790164,0.0520428,1.05,0.9481774,1.162757


In [279]:
sle.me <- ebi.mqtl %>%
    dplyr::filter(accession == "GCST003156")

sle.snps <- unique(sle.me$snp)

In [280]:
module.qtl.set <- module.qtl.sum %>%
    dplyr::filter(snp %in% sle.snps)

module.qtl.set <- unique(module.qtl.set$module.qtl)

In [281]:
all.snps <- module.qtl.sum %>%
    dplyr::filter(module.qtl %in% module.qtl.set)

all.snps <- all.snps$snp

sle <- sle %>%
    dplyr::filter(rsid %in% all.snps) %>%
    dplyr::filter(se > 0) %>%
    dplyr::mutate(IUPAC = alleles.iupac(other_allele, effect_allele))

In [282]:
sle.coloc <- lapply(module.qtl.set, function(module.qtl.id) {
 
    module.id = gsub("-.*", "", module.qtl.id)
    
    mqtl.locus.info = module.qtl.sum %>%
        dplyr::filter(module.qtl == module.qtl.id) %>%
        dplyr::select(m.snp = snp, m.beta=beta, m.se=se, m.pos=pos, major_allele, minor_allele, IUPAC)
    
    all.info = sle %>%
        dplyr::filter(rsid %in% mqtl.locus.info$m.snp) %>%
        merge(
            ., mqtl.locus.info, 
            by.x=c("rsid", "IUPAC"),
            by.y=c("m.snp", "IUPAC")
        ) %>%
        dplyr::select(snp=rsid, m.beta, m.se, pos=m.pos, g.beta=beta, g.se=se)
    
    module.qtl = list()
    module.qtl$beta = all.info$m.beta
    module.qtl$varbeta = all.info$m.se^2
    module.qtl$snp = all.info$snp
    module.qtl$position = all.info$pos
    module.qtl$type = "quant"
    module.qtl$sdY = sd(eigengenes[, module.id], na.rm=TRUE)
    
    gwas.assoc = list()
    gwas.assoc$beta = all.info$g.beta
    gwas.assoc$varbeta = all.info$g.se^2
    gwas.assoc$snp = all.info$snp
    gwas.assoc$position = all.info$pos
    gwas.assoc$type = "cc"
    
    log = capture.output({
        abf.res = suppressWarnings(coloc.abf(gwas.assoc, module.qtl))
    })
    
    data.frame(t(abf.res$summary)) %>%
        dplyr::mutate(Module.QTL = module.qtl.id)
}) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(PP3plusPP4 = PP.H3.abf + PP.H4.abf) %>%
    dplyr::mutate(COLOC.Factor = PP.H4.abf / PP3plusPP4) %>%
    dplyr::mutate(Colocalise = (PP3plusPP4 > 0.25) & (COLOC.Factor > 0.7))

In [283]:
sle.coloc %>%
    dplyr::filter(PP3plusPP4 > 0.25) %>%
    dplyr::arrange(desc(COLOC.Factor)) %>%
    head()

Unnamed: 0_level_0,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,Module.QTL,PP3plusPP4,COLOC.Factor,Colocalise
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<lgl>
1,5470,1.623247e-167,6.7539899999999995e-77,1.767132e-93,0.006359025,0.993641,ME_84_1-6:28836283-33995104,1.0,0.993641,True
2,5470,8.396429e-166,3.4935780000000004e-75,1.95098e-93,0.007124746,0.9928753,ME_84_2-6:28836283-33995104,1.0,0.9928753,True
3,2434,1.694771e-91,0.7051586,4.940225e-92,0.205463052,0.08937838,ME_92_5-6:29991224-32464002,0.2948414,0.3031405,False
4,5470,8.164083999999999e-100,3.396904e-09,2.403389e-91,0.999999789,2.071334e-07,ME_84_5-6:28836283-33995104,1.0,2.071334e-07,False
5,2434,1.933386e-103,8.044414e-13,2.403389e-91,1.0,2.846134e-14,ME_92_2-6:29991224-32464002,1.0,2.846134e-14,False
6,5470,4.086501e-157,1.700308e-66,2.403389e-91,1.0,3.7561440000000005e-60,ME_84_4-6:28836283-33995104,1.0,3.7561440000000005e-60,False


In [288]:
rm(sle)