# Review candidate colocalizing eQTLs detected in the OneK1K dataset

In [1]:
library(scales)
library(xtable)
library(coloc)
# Output of coloc is posterior probabilities of 
# H0 (no causal variant)
# H1 (causal variant for trait 1 only) 
# H2 (causal variant for trait 2 only)
# H3 (two distinct causal variants)
# H4 (one common causal variant) -- we report

fig_dir = "/data/srlab/lrumker/MCSC_Project/cna-qtl/figs/"
tab_dir = "/data/srlab/lrumker/MCSC_Project/cna-qtl/tables/"

This is a new update to coloc.



In [2]:
eqtl_coloc<-function(lead_snp, csaQTL_celltype, sel_gene, expr_celltype,
                     N_eqtl_samples){
    # Import pseudobulk eQTL summary statistics
    eqtls =read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/results/pseudobulk/",
                            csaQTL_celltype,"_",lead_snp,"_csaQTL_test_",expr_celltype,"_eQTLs_pseudobulk_eQTLs.csv"),
                   row.names=1)
    eqtls = eqtls[eqtls$gene==sel_gene,]

    # Add MAF
    G_info = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", csaQTL_celltype,
                              "_",lead_snp,"_cis.vcf.gz"), stringsAsFactors = FALSE, header = TRUE, skip=15)
    rownames(G_info) = G_info$ID
    G_info = G_info[eqtls$variant,]
    G_info = G_info[,c(1:9)]
    get_MAF <-function(info_str){
        maf_str = strsplit(info_str,";")[[1]][2]
        return(as.numeric(strsplit(maf_str,"=")[[1]][2]))
    }
    G_info$MAF = apply(as.matrix(G_info$INFO, ncol=1),1,get_MAF)
    eqtls$MAF = G_info$MAF
    
    # run coloc
    gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                                 csaQTL_celltype,"_" ,lead_snp,"_cis.qassoc"), 
                          header=TRUE, stringsAsFactors=FALSE)
    all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                            csaQTL_celltype,"/spheno.tsv"), header = TRUE, row.names=1)
    lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
    our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                        sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)

    ref_gwas_coloc = list(beta = eqtls$beta, varbeta = eqtls$stderr**2,
                        N=N_eqtl_samples, MAF=eqtls$MAF, type = "quant", snp = eqtls$variant)
    coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)
    return(coloc_res$summary[6]) # return PP.H4.abf
}

## Evaluate colocalization for cis-eQTLs

The genes with eQTLs detected in pseudobulk that were then tested for eQTLs using the single-cell model are evaluated here for colocalization of the eQTL and csaQTL.

For the csaQTL on chromosome 12 associated with expansion of activated NK cells, the candidate eQTLs from our OneK1K analyses are:
- Expression of KLRC1, KLRC2 and KLRC3 in NK cells
- Expression of KLRK1 in B cells

For the csaQTL on chromosome 15 associated with depletion of CD16+ monocytes, the candidate eQTLs from our OneK1K analyses are:
- Expression of BCL2A1 in B cells

In [3]:
eqtl_res = data.frame(matrix(ncol = 7, nrow = 0))
colnames(eqtl_res) = c("Cell Type", "Lead SNP","eGene","Beta", "P", "csaQTL", "Colocalization")

lead_snp="12:10583611:C:T"
csaQTL_celltype="NK"
expr_celltype="NK"
N_eqtl_samples = 935

for(sel_gene in c("KLRC1", "KLRC2", "KLRC3")){
    PPH4 = eqtl_coloc(lead_snp, csaQTL_celltype, sel_gene, 
                                        expr_celltype, N_eqtl_samples)
    
    # lead SNP from single-cell eQTL analysis
    res = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/results/sceQTL/",
             csaQTL_celltype, "_", lead_snp,"_csaQTL_test_",expr_celltype,
             "_sceQTLs_", sel_gene, ".csv"))
    
    new = data.frame(matrix(c(expr_celltype, res[which(res$P==min(res$P)),]$SNP, sel_gene,
                             res[which(res$P==min(res$P)),]$BETA,
                             res[which(res$P==min(res$P)),]$P, 
                             paste0(csaQTL_celltype, "_", lead_snp),
                             PPH4), nrow=1))
    colnames(new) = colnames(eqtl_res)
    eqtl_res = rbind(eqtl_res, new)
}

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.91e-67  7.16e-55  9.80e-15  3.58e-02  9.64e-01 
[1] "PP abf for shared variant: 96.4%"


“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 3.74e-69  1.40e-56  2.67e-13  1.00e+00  2.89e-14 
[1] "PP abf for shared variant: 2.89e-12%"


“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
1.93e-174 7.25e-162  2.67e-13  1.00e+00  6.65e-05 
[1] "PP abf for shared variant: 0.00665%"


In [4]:
expr_celltype="B"
N_eqtl_samples = 910

for(sel_gene in c("KLRK1")){
    PPH4 = eqtl_coloc(lead_snp, csaQTL_celltype, sel_gene, 
                                        expr_celltype, N_eqtl_samples)
    
    # lead SNP from single-cell eQTL analysis
    res = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/results/sceQTL/",
             csaQTL_celltype, "_", lead_snp,"_csaQTL_test_",expr_celltype,
             "_sceQTLs_", sel_gene, ".csv"))
    
    new = data.frame(matrix(c(expr_celltype, res[which(res$P==min(res$P)),]$SNP, sel_gene,
                             res[which(res$P==min(res$P)),]$BETA,
                             res[which(res$P==min(res$P)),]$P, 
                             paste0(csaQTL_celltype, "_", lead_snp),
                             PPH4), nrow=1))
    colnames(new) = colnames(eqtl_res)
    eqtl_res = rbind(eqtl_res, new)
}

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.13e-61  4.23e-49  2.67e-13  1.00e+00  3.98e-05 
[1] "PP abf for shared variant: 0.00398%"


In [5]:
lead_snp="15:80263217:C:T"
csaQTL_celltype="Myeloid"
expr_celltype="B"
N_eqtl_samples = 910

for(sel_gene in c("BCL2A1")){
    PPH4 = eqtl_coloc(lead_snp, csaQTL_celltype, sel_gene, 
                                        expr_celltype, N_eqtl_samples)
    
    # lead SNP from single-cell eQTL analysis
    res = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/results/sceQTL/",
             csaQTL_celltype, "_", lead_snp,"_csaQTL_test_",expr_celltype,
             "_sceQTLs_", sel_gene, ".csv"))
    
    new = data.frame(matrix(c(expr_celltype, res[which(res$P==min(res$P)),]$SNP, sel_gene,
                             res[which(res$P==min(res$P)),]$BETA,
                             res[which(res$P==min(res$P)),]$P, 
                             paste0(csaQTL_celltype, "_", lead_snp),
                             PPH4), nrow=1))
    colnames(new) = colnames(eqtl_res)
    eqtl_res = rbind(eqtl_res, new)
}

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 9.63e-90  1.13e-85  8.49e-05  1.00e+00  6.46e-06 
[1] "PP abf for shared variant: 0.000646%"


In [6]:
# reformat results for display and save results
eqtl_res$Beta = as.character(round(as.numeric(eqtl_res$Beta),2))
eqtl_res$P = format(as.numeric(eqtl_res$P), scientific = TRUE, digits = 2)
eqtl_res$Colocalization = format(as.numeric(eqtl_res$Colocalization), 
                                 scientific = TRUE, digits = 2)
non_sci = as.numeric(eqtl_res$Colocalization)>0.01
eqtl_res$Colocalization[non_sci] = round(as.numeric(eqtl_res$Colocalization[non_sci]),2)

print(xtable(eqtl_res, type = "latex"), file = paste0(tab_dir,"OneK1K_eQTLs.tex"))
eqtl_res

Cell Type,Lead SNP,eGene,Beta,P,csaQTL,Colocalization
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NK,12:10594848:C:A,KLRC1,-0.4,1e-41,NK_12:10583611:C:T,0.96
NK,12:10580062:C:T,KLRC2,-0.86,5.2e-64,NK_12:10583611:C:T,2.9e-14
NK,12:10574001:T:C,KLRC3,-0.59,1.1e-117,NK_12:10583611:C:T,6.6e-05
B,12:10561279:C:G,KLRK1,-0.6,2.3e-56,NK_12:10583611:C:T,4e-05
B,15:80311721:T:C,BCL2A1,0.31,2.6e-64,Myeloid_15:80263217:C:T,6.5e-06
