# Review published eQTL and pQTL candidates for csaQTL colocalization

In [1]:
library(scales)
library(xtable)
library(coloc)
library(dplyr)
# Output of coloc is posterior probabilities of 
# H0 (no causal variant)
# H1 (causal variant for trait 1 only) 
# H2 (causal variant for trait 2 only)
# H3 (two distinct causal variants)
# H4 (one common causal variant) -- we report

fig_dir = "/data/srlab/lrumker/MCSC_Project/cna-qtl/figs/"
tab_dir = "/data/srlab/lrumker/MCSC_Project/cna-qtl/tables/"

This is a new update to coloc.


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
saved_sources = c()
saved_genes = c()
LD_thresh=0.8 # LD threshold (LD to csaQTL lead SNP) for displaying SNPs with published QTL associations

## Review published QTLs for those with some LD to csaQTL lead SNP

In [3]:
lead_snps=c("15:80263217:C:T", "2:111851212:C:T", "11:128070535:A:G", "12:10583611:C:T", "19:16441973:G:A")
csaQTL_celltypes=c("Myeloid", "NK", "NK", "NK", "NK")
for(i_snp in c(1:5)){
    csaQTL_celltype=csaQTL_celltypes[i_snp]
    lead_snp = lead_snps[i_snp]
    
    geno <- read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/",
                          csaQTL_celltype,"_",lead_snp, "_cis.DS.vcf.gz"), row.names=1)
    LD = cor(t(geno))**2

    # Gilchrist NK eQTLs
    i_ref=115 
    infile = paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/Gilchrist/",
                    csaQTL_celltype,"_",lead_snp,"_QTD000",i_ref,"_cis.hg19.renamed.csv")
    # Files are absent if no SNPs in the cis window have nominal associations
    res = suppressWarnings(tryCatch(read.table(infile), error=function(e) NULL))
    if(!is.null(res)){
        res$ID = paste0(res$CHR, rep(":", dim(res)[1]), res$BP, rep(":", dim(res)[1]), 
               res$other_allele, rep(":", dim(res)[1]), res$effect_allele)
        LD_snps = colnames(LD)[which(LD[lead_snp,]>LD_thresh)]
        i_LD_snps = match(LD_snps, res$ID)
        i_LD_snps = i_LD_snps[which(!is.na(i_LD_snps))]
        ld_res = res[i_LD_snps,]
        if(dim(ld_res)[1]>0){ # If any SNPs with LD above the threshold passed the p-value threshold
            for(gene_id in unique(ld_res$gene_id)){ # For a given candidate eGene
                gene_res = res[res$gene_id==gene_id,]
                gene_res = gene_res[gene_res$ID %in% colnames(LD),]
                eQTL_lead_snp = gene_res$ID[which(gene_res$P==min(gene_res$P))]
                if(LD[lead_snp, eQTL_lead_snp]>LD_thresh){ # If eQTL lead snp and csaQTL lead snp in LD
                    saved_sources = c(saved_sources, infile)
                    saved_genes = c(saved_genes, gene_id)
                }
            }
        }
    }
    
    # Sun pQTLs
    i_ref=584 
    infile = paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/Sun/",
                    csaQTL_celltype,"_",lead_snp,"_QTD000",i_ref,"_cis.hg19.renamed.csv")
    res = suppressWarnings(tryCatch(read.table(infile), error=function(e) NULL))
    if(!is.null(res)){
        res$ID = paste0(res$CHR, rep(":", dim(res)[1]), res$BP, rep(":", dim(res)[1]), 
               res$other_allele, rep(":", dim(res)[1]), res$effect_allele)
        LD_snps = colnames(LD)[which(LD[lead_snp,]>LD_thresh)]
        i_LD_snps = match(LD_snps, res$ID)
        i_LD_snps = i_LD_snps[which(!is.na(i_LD_snps))]
        ld_res = res[i_LD_snps,]
        if(dim(ld_res)[1]>0){ # If any SNPs with LD above the threshold passed the p-value threshold
            for(gene_id in unique(ld_res$gene_id)){ # For a given candidate protein
                gene_res = res[res$gene_id==gene_id,]
                gene_res = gene_res[gene_res$ID %in% colnames(LD),]
                eQTL_lead_snp = gene_res$ID[which(gene_res$P==min(gene_res$P))]
                if(LD[lead_snp, eQTL_lead_snp]>LD_thresh){ # If pQTL lead snp and csaQTL lead snp in LD
                    saved_sources = c(saved_sources, infile)
                    saved_genes = c(saved_genes, gene_id)
                }
            }
        }
    } 
                   
    # BLUEPRINT eQTLs — several cell type specific analyses
    # Downloaded all quantification methods but just consider gene expression 'ge'
    for(i_ref in c(21, 26, 31)){
        infile = paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/BLUEPRINT/",
                        csaQTL_celltype,"_",lead_snp,"_QTD0000",i_ref,"_cis.hg19.renamed.csv")
        res = suppressWarnings(tryCatch(read.csv(infile), error=function(e) NULL))
        if(!is.null(res)){
            res$ID = paste0(res$CHR, rep(":", dim(res)[1]), res$BP, rep(":", dim(res)[1]), 
                   res$other_allele, rep(":", dim(res)[1]), res$effect_allele)
            LD_snps = colnames(LD)[which(LD[lead_snp,]>LD_thresh)]
            i_LD_snps = match(LD_snps, res$ID)
            i_LD_snps = i_LD_snps[which(!is.na(i_LD_snps))]
            ld_res = res[i_LD_snps,]
            if(dim(ld_res)[1]>0){ # If any SNPs with LD above the threshold passed the p-value threshold
                for(gene_id in unique(ld_res$gene_id)){ # For a given candidate eGene
                    gene_res = res[res$gene_id==gene_id,]
                    gene_res = gene_res[gene_res$ID %in% colnames(LD),]
                    eQTL_lead_snp = gene_res$ID[which(gene_res$P==min(gene_res$P))]
                    if(LD[lead_snp, eQTL_lead_snp]>LD_thresh){ # If eQTL lead snp and csaQTL lead snp in LD
                        saved_sources = c(saved_sources, infile)
                        saved_genes = c(saved_genes, gene_id)
                    }
                }
            }  
        }                                   
    }
                                        
    # Schmiedel eQTLs — NK
    infile = paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/Schmiedel/",
                    csaQTL_celltype,"_",lead_snp,"_NK_cis.hg19.renamed.csv")
    res = suppressWarnings(tryCatch(read.csv(infile), error=function(e) NULL))
    if(!is.null(res)){
        res$ID = paste0(res$CHR, rep(":", dim(res)[1]), res$BP, rep(":", dim(res)[1]), 
               res$other_allele, rep(":", dim(res)[1]), res$effect_allele)
        LD_snps = colnames(LD)[which(LD[lead_snp,]>LD_thresh)]
        i_LD_snps = match(LD_snps, res$ID)
        i_LD_snps = i_LD_snps[which(!is.na(i_LD_snps))]
        ld_res = res[i_LD_snps,]
        if(dim(ld_res)[1]>0){ # If any SNPs with LD above the threshold passed the p-value threshold
            for(gene_id in unique(ld_res$gene_id)){ # For a given candidate eGene
                gene_res = res[res$gene_id==gene_id,]
                gene_res = gene_res[gene_res$ID %in% colnames(LD),]
                eQTL_lead_snp = gene_res$ID[which(gene_res$P==min(gene_res$P))]
                if(LD[lead_snp, eQTL_lead_snp]>LD_thresh){ # If eQTL lead snp and csaQTL lead snp in LD
                    saved_sources = c(saved_sources, infile)
                    saved_genes = c(saved_genes, gene_id)
                }
            }
        }
    }                                
                                        
    # eQTLgen — aggregate blood
    infile = paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/eQTLgen/",
                    csaQTL_celltype,"_",lead_snp,"_cis.csv")
    res = suppressWarnings(tryCatch(read.table(infile, header = TRUE), error=function(e) NULL))
    if(!is.null(res)){
        res$ID = paste0(res$SNPChr, rep(":", dim(res)[1]), res$SNPPos, rep(":", dim(res)[1]), 
               res$OtherAllele, rep(":", dim(res)[1]), res$AssessedAllele)
        LD_snps = colnames(LD)[which(LD[lead_snp,]>LD_thresh)]
        i_LD_snps = match(LD_snps, res$ID)
        i_LD_snps = i_LD_snps[which(!is.na(i_LD_snps))]
        ld_res = res[i_LD_snps,]
        if(dim(ld_res)[1]>0){ # If any SNPs with LD above the threshold passed the p-value threshold
            for(gene_id in unique(ld_res$GeneSymbol)){ # For a given candidate eGene
                gene_res = res[res$GeneSymbol==gene_id,] 
                gene_res = gene_res[gene_res$ID %in% colnames(LD),]
                eQTL_lead_snp = gene_res$ID[which(gene_res$Pvalue==min(gene_res$Pvalue))]
                if(LD[lead_snp, eQTL_lead_snp]>LD_thresh){ # If eQTL lead snp and csaQTL lead snp in LD
                    saved_sources = c(saved_sources, infile)
                    saved_genes = c(saved_genes, gene_id)
                }
            }
        }  
    }
                                    
    # DICE eQTLs — several cell type specific analyses
    for(expr_celltype in c("MONOCYTES", "NK", "B_CELL_NAIVE", "CD4_NAIVE", "CD4_STIM", "CD8_NAIVE", "CD8_STIM", 
                     "M2", "TFH", "TH17", "TH1", "TH2", "THSTAR", "TREG_MEM", "TREG_NAIVE")){
        infile=paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/DICE/cis_res/",
                          expr_celltype,".renamed.",csaQTL_celltype,"_", lead_snp,"_cis.vcf.gz")
        res = suppressWarnings(tryCatch(read.table(infile), error=function(e) NULL))
        
        if(!is.null(res)){
            get_symbol <-function(info_str){
                return(strsplit(strsplit(info_str,";")[[1]][2],"=")[[1]][2])
            }
            colnames(res) = c("CHR", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
            res['gene_ID'] = apply(as.matrix(res$INFO, ncol=1), 1, get_symbol)
            get_pval <-function(info_str){
                return(as.numeric(strsplit(strsplit(info_str,";")[[1]][3],"=")[[1]][2]))
            }
            res['p_val'] = apply(as.matrix(res$INFO, ncol=1), 1, get_pval)

            LD_snps = colnames(LD)[which(LD[lead_snp,]>LD_thresh)]
            i_LD_snps = match(LD_snps, res$ID)
            i_LD_snps = i_LD_snps[which(!is.na(i_LD_snps))]
            ld_res = res[i_LD_snps,]
            if(dim(ld_res)[1]>0){ # If any SNPs with LD above the threshold passed the p-value threshold
                for(gene_id in unique(ld_res$gene_ID)){ # For a given candidate eGene
                    gene_res = res[res$gene_ID==gene_id,]
                    gene_res = gene_res[gene_res$ID %in% colnames(LD),]
                    eQTL_lead_snp = gene_res$ID[which(gene_res$p_val==min(gene_res$p_val))]
                    if(LD[lead_snp, eQTL_lead_snp]>LD_thresh){ # If eQTL lead snp and csaQTL lead snp in LD
                        saved_sources = c(saved_sources, infile)
                        saved_genes = c(saved_genes, gene_id)
                    }
                }
            }  
        }
    }
}

“the condition has length > 1 and only the first element will be used”
“the condition has length > 1 and only the first element will be used”
“the condition has length > 1 and only the first element will be used”
“the condition has length > 1 and only the first element will be used”
“the condition has length > 1 and only the first element will be used”
“the condition has length > 1 and only the first element will be used”


In [4]:
# Reformat table of published QTL candidates for colocalization with csaQTL
candidates = data.frame("infile"= saved_sources, "gene_id"= saved_genes)
get_study <- function(in_str) strsplit(in_str, "/")[[1]][9]
get_csaQTL <- function(in_str) paste0(strsplit(strsplit(in_str, "/")[[1]][10], "_")[[1]][1:2], collapse="_")
get_csaQTL_DICE <- function(in_str) gsub('_cis', '', strsplit(strsplit(in_str, "/")[[1]][11], "\\.")[[1]][3])
get_expr_celltype <- function(in_str) strsplit(strsplit(in_str, "/")[[1]][10], "_")[[1]][3]
get_expr_celltype_DICE <- function(in_str) strsplit(strsplit(in_str, "/")[[1]][11], "\\.")[[1]][1]
candidates$study = apply(as.matrix(candidates$infile, ncol=1), 1, get_study)
candidates$csaQTL = apply(as.matrix(candidates$infile, ncol=1), 1, get_csaQTL)
candidates$csaQTL[which(candidates$study=="DICE")] = apply(as.matrix(candidates$infile[which(candidates$study=="DICE")], 
                                                        ncol=1), 1, get_csaQTL_DICE)
candidates$celltype = apply(as.matrix(candidates$infile, ncol=1), 1, get_expr_celltype)
candidates$celltype[which(candidates$study=="DICE")] = apply(as.matrix(candidates$infile[which(candidates$study=="DICE")], 
                                                        ncol=1), 1, get_expr_celltype_DICE)
candidates$celltype[which(candidates$study=="eQTLgen")] = "Blood"
export = candidates[c(1,2,4),c("csaQTL", "gene_id","celltype", "study")] # Ignore lncRNA (RP11-277P12.9) and pseudogene (RN7SL823P)
candidates[,c("csaQTL", "gene_id","celltype", "study")]

csaQTL,gene_id,celltype,study
<chr>,<chr>,<chr>,<chr>
Myeloid_15:80263217:C:T,BCL2A1,Blood,eQTLgen
NK_11:128070535:A:G,ENSG00000134954,NK,Schmiedel
NK_12:10583611:C:T,RP11-277P12.9,B_CELL_NAIVE,DICE
NK_12:10583611:C:T,KLRC4,CD8_STIM,DICE
NK_19:16441973:G:A,RN7SL823P,TH2,DICE


## Test colocalization

### eQTLgen BCL2A1

In [5]:
i_locus = 1
lead_snp = strsplit(candidates$csaQTL[i_locus], "_")[[1]][2]
csaQTL_celltype = strsplit(candidates$csaQTL[i_locus], "_")[[1]][1]
sel_gene = candidates$gene_id[i_locus]
study=candidates$study[i_locus]

# Import published eQTL summary statistics
eqtls =read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/", study, "/",
                        csaQTL_celltype,"_",lead_snp,"_cis_",sel_gene,".csv"), header = TRUE)
eqtls$ID = paste0(eqtls$SNPChr, ":", eqtls$SNPPos, ":", eqtls$OtherAllele, ":", 
                 eqtls$AssessedAllele)

# Add MAF using genotypes observed in OneK1K cohort
G_info = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", csaQTL_celltype,
                          "_",lead_snp,"_cis.vcf.gz"), stringsAsFactors = FALSE, header = TRUE, skip=15)
rownames(G_info) = G_info$ID
eqtls = eqtls[eqtls$ID %in% rownames(G_info),]
G_info = G_info[eqtls$ID,]
G_info = G_info[,c(1:9)]
get_MAF <-function(info_str){
    maf_str = strsplit(info_str,";")[[1]][2]
    return(as.numeric(strsplit(maf_str,"=")[[1]][2]))
}
G_info$MAF = apply(as.matrix(G_info$INFO, ncol=1),1,get_MAF)
eqtls$MAF = G_info$MAF

# Compute beta from available metrics in sumstats
eqtls$beta = eqtls$Zscore / (2*eqtls$MAF*(1 - eqtls$MAF)*(eqtls$NrSamples + eqtls$Zscore**2))**(1/2)

In [6]:
# run coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             csaQTL_celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        csaQTL_celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(pvalues = eqtls$Pvalue,
                    N=eqtls$NrSamples, MAF=eqtls$MAF, type = "quant", snp = eqtls$ID)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.17e-50  1.11e-46  1.93e-06  1.73e-02  9.83e-01 
[1] "PP abf for shared variant: 98.3%"


In [7]:
# Store information
eqtls[order(eqtls$Pvalue),][1,] 
eQTL_lead_snps = c("15:80260274:A:T")
eQTL_pvalues = c(9.2612e-52)
eQTL_betas = c(-0.239934)
coloc_prob = c(0.98)

Unnamed: 0_level_0,Pvalue,SNP,SNPChr,SNPPos,AssessedAllele,OtherAllele,Zscore,Gene,GeneSymbol,GeneChr,GenePos,NrCohorts,NrSamples,FDR,BonferroniP,ID,MAF,beta
Unnamed: 0_level_1,<dbl>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,<chr>,<chr>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1,9.261199999999999e-52,rs8025805,15,80260274,T,A,-15.1368,ENSG00000140379,BCL2A1,15,80258509,6,9922,0,1.1793e-43,15:80260274:A:T,0.2677,-0.2399349


### Schmiedel ETS1

In [8]:
i_locus = 2
lead_snp = strsplit(candidates$csaQTL[i_locus], "_")[[1]][2]
csaQTL_celltype = strsplit(candidates$csaQTL[i_locus], "_")[[1]][1]
sel_gene = candidates$gene_id[i_locus]
study=candidates$study[i_locus]

# Import published eQTL summary statistics
# Checked for SNP flips, but no additional SNPs are captured when considering flips
eqtls =read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/", study, "/",
                        csaQTL_celltype,"_",lead_snp,"_NK_cis_",sel_gene,".hg19.renamed.csv"), header = TRUE)
eqtls$ID = paste0(eqtls$CHR, ":", eqtls$BP, ":", eqtls$other_allele, ":", 
                 eqtls$effect_allele)
# estimate total samples using reported ma_samples, MAF and assuming HWE
eqtls$N = 1/((eqtls$maf**2+2*eqtls$maf*(1-eqtls$maf))/eqtls$ma_samples)

In [9]:
# run coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             csaQTL_celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% eqtls$ID,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        csaQTL_celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
eqtls = eqtls[eqtls$ID %in% gwas_res$SNP,]
ref_gwas_coloc = list(beta = eqtls$BETA, varbeta = eqtls$SE**2,
                    N=eqtls$N, MAF=eqtls$maf, type = "quant", snp = eqtls$ID)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“minimum p value is: 0.00022998
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.”
“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 3.20e-18  3.89e-01  1.77e-18  2.14e-01  3.97e-01 
[1] "PP abf for shared variant: 39.7%"


In [10]:
# Store information
eqtls[order(eqtls$P),][1,] # lead SNP
eQTL_lead_snps = c(eQTL_lead_snps, "11:128085408:C:T")
eQTL_pvalues = c(eQTL_pvalues, 0.000431584)
eQTL_betas = c(eQTL_betas, 0.391952)
coloc_prob = c(coloc_prob, 0.40)

Unnamed: 0_level_0,CHR,BP,other_allele,effect_allele,maf,ma_samples,P,BETA,SE,gene_id,ID,N
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<dbl>
3065,11,128085408,C,T,0.255556,36,0.000431584,0.391952,0.106405,ENSG00000134954,11:128085408:C:T,80.75313


### DICE KLRC4

In [11]:
i_locus = 3
lead_snp = strsplit(candidates$csaQTL[i_locus], "_")[[1]][2]
csaQTL_celltype = strsplit(candidates$csaQTL[i_locus], "_")[[1]][1]
sel_gene = candidates$gene_id[i_locus]
study=candidates$study[i_locus]

In [12]:
# Import published eQTL summary statistics
eqtls =read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/sumstats/", study,
                       "/cis_res/CD8_STIM.renamed.NK_12:10583611:C:T_cis.vcf.gz"),
                skip = 0, header = FALSE)
colnames(eqtls) = c("CHR", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
eqtls$N = rep(91, dim(eqtls)[1]) # Schmiedel et al Cell 2018 indicates N=91 donors total
get_beta <-function(info_str){
    return(as.numeric(strsplit(strsplit(info_str, split=";")[[1]][4], split="=")[[1]][2]))
}
eqtls$BETA = apply(as.matrix(eqtls$INFO, ncol=1), 1, get_beta)

get_pval <-function(info_str){
    return(as.numeric(strsplit(strsplit(info_str, split=";")[[1]][3], split="=")[[1]][2]))
}
eqtls$P = apply(as.matrix(eqtls$INFO, ncol=1), 1, get_pval)

get_gene <-function(info_str){
    return(strsplit(strsplit(info_str, split=";")[[1]][2], split="=")[[1]][2])
}
eqtls$Gene = apply(as.matrix(eqtls$INFO, ncol=1), 1, get_gene)
eqtls = eqtls[eqtls$Gene=='KLRC4',] # Refine to candidate gene

# Add MAF using genotypes observed in OneK1K cohort
# NOTE that DICE donors represent multiple genetic ancestries (Table S1), ~50% described as "White"
G_info = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", csaQTL_celltype,
                          "_",lead_snp,"_cis.vcf.gz"), stringsAsFactors = FALSE, header = TRUE, skip=15)
rownames(G_info) = G_info$ID
eqtls = eqtls[eqtls$ID %in% rownames(G_info),]
G_info = G_info[eqtls$ID,]
G_info = G_info[,c(1:9)]
get_MAF <-function(info_str){
    maf_str = strsplit(info_str,";")[[1]][2]
    return(as.numeric(strsplit(maf_str,"=")[[1]][2]))
}
G_info$MAF = apply(as.matrix(G_info$INFO, ncol=1),1,get_MAF)
eqtls$maf = G_info$MAF

In [13]:
# run coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             csaQTL_celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% eqtls$ID,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        csaQTL_celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
eqtls = eqtls[eqtls$ID %in% gwas_res$SNP,]
ref_gwas_coloc = list(beta = eqtls$BETA, pvalues = eqtls$P,
                    N=eqtls$N, MAF=eqtls$maf, type = "quant", snp = eqtls$ID)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)


“minimum p value is: 4.877e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.67e-12  2.05e-01  1.67e-13  1.98e-02  7.75e-01 
[1] "PP abf for shared variant: 77.5%"


In [14]:
# Store information
eqtls[order(eqtls$P),][1,] # lead SNP
eQTL_lead_snps = c(eQTL_lead_snps, "12:10591281:G:A")
eQTL_pvalues = c(eQTL_pvalues, 4.88e-6)
eQTL_betas = c(eQTL_betas, 0.889)
coloc_prob = c(coloc_prob, 0.78)

Unnamed: 0_level_0,CHR,POS,ID,REF,ALT,QUAL,FILTER,INFO,N,BETA,P,Gene,maf
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
351,12,10591281,12:10591281:G:A,G,A,.,PASS,Gene=ENSG00000183542;GeneSymbol=KLRC4;Pvalue=4.87699e-06;Beta=0.888609,91,0.888609,4.87699e-06,KLRC4,0.17344


In [17]:
# Reformat for display and export results table
export['Cell Type'] = c("Whole blood", "NK", "Stim. CD8+ T")
export['Lead SNP'] = eQTL_lead_snps
export['eGene'] = c("BCL2A1", "ETS1", "KLRC4")
export['Beta'] = eQTL_betas
export['P'] = eQTL_pvalues
export['Coloc.'] = coloc_prob

In [18]:
# reformat for display
export$Beta = as.character(round(as.numeric(export$Beta),2))
export$P = format(as.numeric(export$P), scientific = TRUE, digits = 2)
export = export[,c("Cell Type", "Lead SNP", "eGene", "Beta", "P", "csaQTL", "Coloc.")]
print(xtable(export, type = "latex"), file = paste0(tab_dir,"supptable.published_eQTLs.tex"),
     include.rownames=FALSE)
export

Unnamed: 0_level_0,Cell Type,Lead SNP,eGene,Beta,P,csaQTL,Coloc.
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
1,Whole blood,15:80260274:A:T,BCL2A1,-0.24,9.3e-52,Myeloid_15:80263217:C:T,0.98
2,NK,11:128085408:C:T,ETS1,0.39,0.00043,NK_11:128070535:A:G,0.4
4,Stim. CD8+ T,12:10591281:G:A,KLRC4,0.89,4.9e-06,NK_12:10583611:C:T,0.78
