# eQTL and csaQTL testing with cell state abundance and eGene expression covariates

In [1]:
.libPaths("/PHShome/lar24/anaconda3/envs/py3_clone/lib/R/library")

In [2]:
library(lme4)
library(Matrix)
library(xtable)
set.seed(0)

“package ‘lme4’ was built under R version 4.1.0”
Loading required package: Matrix

“package ‘Matrix’ was built under R version 4.1.0”
“package ‘xtable’ was built under R version 4.1.1”


In [3]:
test_eQTL<-function(lead_snp, chr, celltype, gene, csa_covariate){
    src_filepath="/data/srlab/lrumker/MCSC_Project/cna-qtl/eqtls/results/sceQTL/inputs/"
    
    # load genotype data
    geno <- read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/",
                     celltype, "_", lead_snp, "_", "cis.DS.vcf.gz"), row.names=1)
    geno_ids = read.table(paste0("/data/srlab/lrumker/datasets/onek1k/geno/sample_labels/",
                         "sample_list_chr", chr, ".txt"))
    geno_ids = as.character(geno_ids[1,])
    colnames(geno) = geno_ids
    
    # load phenotype and covariate data
    expr_file=paste0(src_filepath,celltype,'_',lead_snp,'_csaQTL_test_',celltype,'_eQTLs_selgene_exp.csv')
    exprs_raw = read.csv(expr_file, row.names = 1) #raw UMI counts #raw UMI counts
    pca_res = read.csv(paste0(src_filepath, celltype, "_ePCs.csv"), row.names = 1) # gene expression PCs
    cell_meta = read.csv(paste0(src_filepath, celltype, "_cellmeta.csv")) # cell and donor covariates
    
    # assemble data object
    data = cbind(cbind(exprs_raw, pca_res), cell_meta)
    data[,gene] = as.numeric(data[,gene]) 
    data$id = factor(data$id)
    data$age = scale(data$age)
    data$nCount_RNA = scale(log(data$nCount_RNA)) # nUMI
    data['E'] = data[,gene]
    geno = geno[,colnames(geno) %in% unique(data$id)] # only donors that passed QC
    
    # Test lead snp
    G_snp = data.frame("G" = as.numeric(as.character(geno[rownames(geno)==lead_snp, 
                                                          match(data$id, colnames(geno))])))
    mod_data = cbind(data, G_snp)
    
    if(csa_covariate==FALSE){
        full_model <- lme4::glmer(formula = E~G+(1|id)+(1|batch)+age+sex+nCount_RNA+percent.mt+gPC1+gPC2+gPC3+gPC4+gPC5+gPC6+ePC1+ePC2+ePC3+ePC4+ePC5,
                          family = "poisson", nAGQ=0, data= mod_data, control = glmerControl(optimizer = "nloptwrap"))
        null_model <- lme4::glmer(formula = E~(1|id)+(1|batch)+age+sex+nCount_RNA+percent.mt+gPC1+gPC2+gPC3+gPC4+gPC5+gPC6+ePC1+ePC2+ePC3+ePC4+ePC5,
                                  family = "poisson", nAGQ=0, data= mod_data, control = glmerControl(optimizer = "nloptwrap"))
        
        added_covariate="None"
    }else{
        spheno = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_",
                                 celltype,"/spheno.tsv"),sep="\t")
        mod_data['csa_pheno']=spheno[,gsub(":", ".", paste0("X",lead_snp))][match(mod_data$id, spheno$id)]

        full_model <- lme4::glmer(formula = E~G+(1|id)+(1|batch)+age+sex+nCount_RNA+percent.mt+gPC1+gPC2+gPC3+gPC4+gPC5+gPC6+ePC1+ePC2+ePC3+ePC4+ePC5+csa_pheno,
                              family = "poisson", nAGQ=0, data= mod_data, control = glmerControl(optimizer = "nloptwrap"))
        null_model <- lme4::glmer(formula = E~(1|id)+(1|batch)+age+sex+nCount_RNA+percent.mt+gPC1+gPC2+gPC3+gPC4+gPC5+gPC6+ePC1+ePC2+ePC3+ePC4+ePC5+csa_pheno,
                                  family = "poisson", nAGQ=0, data= mod_data, control = glmerControl(optimizer = "nloptwrap"))
        added_covariate="GeNA csaQTL sample-level phenotype"
    }
    
    model_lrt <- anova(null_model, full_model)
    res = data.frame("Cell type"=celltype, "SNP" = lead_snp, "GENE" = gene,
             "BETA" = summary(full_model)$coefficients[2,][1], #G beta
            "SE" = summary(full_model)$coefficients[2,][2], #G se
            "P" = model_lrt$`Pr(>Chisq)`[2], "Tested Effect" = paste0("\\textit{", gene, "} eQTL"),
                    "Added Covariate"=added_covariate)
    return(res)
}

In [4]:
all_res = data.frame()
all_res = rbind(all_res, test_eQTL(lead_snp="15:80263217:C:T", chr="15", celltype="Myeloid", 
                                   gene='BCL2A1', csa_covariate=FALSE))
all_res = rbind(all_res, test_eQTL(lead_snp="15:80263217:C:T", chr="15", celltype="Myeloid", 
                                   gene='BCL2A1', csa_covariate=TRUE))
all_res = rbind(all_res, test_eQTL(lead_snp="12:10583611:C:T", chr="12", celltype="NK", 
                                   gene='KLRC1', csa_covariate=FALSE))
all_res = rbind(all_res, test_eQTL(lead_snp="12:10583611:C:T", chr="12", celltype="NK",
                                   gene='KLRC1', csa_covariate=TRUE))

In [5]:
all_res = all_res[,c("Cell.type", "SNP", "Tested.Effect", "Added.Covariate", "P")]
colnames(all_res) = c("Cell Type", "SNP", "Tested Effect", "Added Covariate", "P")

## Load results from parallel testing with GeNA

In [6]:
celltype="Myeloid"
lead_snp = "15:80263217:C:T"
eGene = "BCL2A1"

GeNA_sumstats = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_",
                                celltype,"/gwas_loci.tsv"), sep = "\t")
res = data.frame("Cell Type" = celltype, "SNP" = lead_snp, "Tested Effect" = "GeNA csaQTL", 
                 "Added Covariate" = "None", 
                 "P" = GeNA_sumstats[GeNA_sumstats$ID==lead_snp,"P"])
colnames(res) = c("Cell Type", "SNP", "Tested Effect", "Added Covariate", "P")
all_res = rbind(all_res, res)

GeNA_sumstats = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_",
                                celltype,"_eGene_covs/GeNA_sumstats.txt"), sep = "\t")
res = data.frame("Cell Type" = celltype, "SNP" = lead_snp, "Tested Effect" = "GeNA csaQTL", 
                 "Added Covariate" = paste0("\\textit{",eGene, "} mean expression across ", celltype, " cells"), 
                 "P" = GeNA_sumstats[GeNA_sumstats$ID==lead_snp,"P"])
colnames(res) = c("Cell Type", "SNP", "Tested Effect", "Added Covariate", "P")
all_res = rbind(all_res, res)

In [7]:
celltype="NK"
lead_snp = "12:10583611:C:T"
eGene = "KLRC1"

GeNA_sumstats = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_",
                                celltype,"/gwas_loci.tsv"), sep = "\t")
res = data.frame("Cell Type" = celltype, "SNP" = lead_snp, "Tested Effect" = "GeNA csaQTL", 
                 "Added Covariate" = "None", 
                 "P" = GeNA_sumstats[GeNA_sumstats$ID==lead_snp,"P"])
colnames(res) = c("Cell Type", "SNP", "Tested Effect", "Added Covariate", "P")
all_res = rbind(all_res, res)

GeNA_sumstats = read.csv(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_",
                                celltype,"_eGene_covs/GeNA_sumstats.txt"), sep = "\t")
res = data.frame("Cell Type" = celltype, "SNP" = lead_snp, "Tested Effect" = "GeNA csaQTL", 
                 "Added Covariate" = paste0("\\textit{", eGene, "} mean expression across ", celltype, " cells"), 
                 "P" = GeNA_sumstats[GeNA_sumstats$ID==lead_snp,"P"])
colnames(res) = c("Cell Type", "SNP", "Tested Effect", "Added Covariate", "P")
all_res = rbind(all_res, res)

In [8]:
p_str=c()
for(i in c(1:nrow(all_res))){ p_str = c(p_str, sprintf('%.1e', all_res$P[i]))}
all_res$P = p_str

In [9]:
all_res

Unnamed: 0_level_0,Cell Type,SNP,Tested Effect,Added Covariate,P
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>
Estimate,Myeloid,15:80263217:C:T,\textit{BCL2A1} eQTL,,0.0011
Estimate1,Myeloid,15:80263217:C:T,\textit{BCL2A1} eQTL,GeNA csaQTL sample-level phenotype,0.0026
Estimate2,NK,12:10583611:C:T,\textit{KLRC1} eQTL,,3.5e-41
Estimate3,NK,12:10583611:C:T,\textit{KLRC1} eQTL,GeNA csaQTL sample-level phenotype,1.4e-32
1,Myeloid,15:80263217:C:T,GeNA csaQTL,,2.6e-08
11,Myeloid,15:80263217:C:T,GeNA csaQTL,\textit{BCL2A1} mean expression across Myeloid cells,0.012
12,NK,12:10583611:C:T,GeNA csaQTL,,2e-11
13,NK,12:10583611:C:T,GeNA csaQTL,\textit{KLRC1} mean expression across NK cells,5e-16


In [10]:
table_dir="/data/srlab/lrumker/MCSC_Project/cna-qtl/tables/"
print(xtable(all_res), file=paste0(table_dir,"supptable.csaQTL_eQTL_covariates.tex"), 
      include.rownames=FALSE, sanitize.text.function = function(x) {x})