# eQTL in Epigenetic Data

**Created**: 18 February 2022

The goal here is to perform a simple comparison of *cis*-eQTL detected in GAinS with existing epigenomic datasets. I will be using data that has been published rather than processed data. This includes DA peaks from Calderon *et al.* and Ram-Mohan *et al.* and chromatin accessibility QTL (caQTL) from Calderon *et al.*

## Environment

In [1]:
if (!requireNamespace("GenomicRanges")) {
    BiocManager::install("GenomicRanges")
}

if (!requireNamespace("biomaRt")) {
    BiocManager::install("biomaRt")
}

if (!requireNamespace("rtracklayer", quietly=T)) {
    BiocManager::install("rtracklayer")
}

Loading required namespace: GenomicRanges

Loading required namespace: biomaRt



In [2]:
library(tidyverse)
library(data.table)
library(GenomicRanges)
library(biomaRt)
library(rtracklayer)

setwd("~/eQTL_pQTL_Characterization/")

source("03_Functional_Interpretation/scripts/utils/ggplot_theme.R")

── [1mAttaching packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘data.table’


The following objects are masked from ‘package:dply

## Load Data

In [3]:
calderon.da.peaks <- fread("03_Functional_Interpretation/data/41588_2019_505_MOESM6_ESM")

In [4]:
calderon.da.peaks <- calderon.da.peaks %>%
    as.data.frame() %>%
    dplyr::mutate(chr=gsub("chr", "", gsub("_.*", "", peak_id))) %>%
    dplyr::mutate(start=as.numeric(sapply(strsplit(peak_id, "_"), function(x) { x[2] }))) %>%
    dplyr::mutate(end=as.numeric(sapply(strsplit(peak_id, "_"), function(x) { x[3] }))) %>%
    dplyr::mutate(cell_type=gsub("_S-.*", "", contrast)) %>%
    dplyr::mutate(peak_id=paste0(chr, ":", start, "-", end)) %>%
    dplyr::mutate(chr = as.numeric(chr)) %>%
    dplyr::filter(chr %in% 1:22) %>%
    dplyr::select(peak_id, chr, start, end, cell_type, logFC, pval=adj.P.Val)

In [5]:
head(calderon.da.peaks)

Unnamed: 0_level_0,peak_id,chr,start,end,cell_type,logFC,pval
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1,1:101414421-101416252,1,101414421,101416252,Bulk_B,-1.588574,5.706523e-06
2,1:101553654-101554433,1,101553654,101554433,Bulk_B,2.756835,4.359877e-06
3,1:101875468-101877095,1,101875468,101877095,Bulk_B,1.924191,0.0001650887
4,1:105307605-105308001,1,105307605,105308001,Bulk_B,2.668101,5.036673e-05
5,1:108073851-108074235,1,108073851,108074235,Bulk_B,-3.099729,0.002662072
6,1:108475984-108478550,1,108475984,108478550,Bulk_B,1.297943,0.006296819


In [6]:
calderon.ca.qtl <- fread("03_Functional_Interpretation/data/41588_2019_505_MOESM8_ESM")

In [7]:
calderon.ca.qtl <- calderon.ca.qtl %>%
    as.data.frame() %>%
    dplyr::mutate(chr=as.numeric(gsub("chr", "", chr))) %>%
    dplyr::filter(chr %in% 1:22) %>%
    dplyr::select(chr, pos, everything())

In [8]:
head(calderon.ca.qtl)

Unnamed: 0_level_0,chr,pos,Phenotype,dbSNP134_id,gwas_pvalue,PMID,TotalDiscoverySamples,donor,cell,stim,⋯,peak_id_atac,contrast_atac,logFC_atac,adj.P.Val_atac,nearby_de_gene_id,contrast_rna,logFC_rna,adj.P.Val_rna,tested_TF,ref_minus_alt_match
Unnamed: 0_level_1,<dbl>,<int>,<chr>,<int>,<dbl>,<int>,<int>,<int>,<chr>,<lgl>,⋯,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>
1,1,204434927,Transmission distortion,12092943,9.141e-10,22377632,4728,1002,Mem_B,True,⋯,chr1_204434103_204436273,Mem_B_S-Mem_B_U,1.606535,0.0002127197,,,,,ENSG00000008196_LINE2_TFAP2B_D_N1,-5.1125152
2,1,204434927,Maternal transmission distortion,12092943,3.252e-09,22377632,4728,1002,Mem_B,True,⋯,chr1_204434103_204436273,Mem_B_S-Mem_B_U,1.606535,0.0002127197,,,,,ENSG00000008196_LINE2_TFAP2B_D_N1,-5.1125152
3,1,204434927,Transmission distortion,12092943,9.141e-10,22377632,4728,1002,Mem_B,True,⋯,chr1_204434103_204436273,Mem_B_S-Mem_B_U,1.606535,0.0002127197,,,,,ENSG00000008196_LINE2_TFAP2B_D_N1,-5.1125152
4,1,204434927,Maternal transmission distortion,12092943,3.252e-09,22377632,4728,1002,Mem_B,True,⋯,chr1_204434103_204436273,Mem_B_S-Mem_B_U,1.606535,0.0002127197,,,,,ENSG00000008196_LINE2_TFAP2B_D_N1,-5.1125152
5,15,45740392,Serum creatinine,9806699,2.56e-13,20383146,67093,1001,Myeloid_DCs,False,⋯,chr15_45739896_45742558,pDCs_U-Myeloid_DCs_U,-2.623013,6.295777e-06,,,,,ENSG00000008196_LINE2_TFAP2B_D_N1,0.6583003
6,15,45740392,Serum creatinine,9806699,2.56e-13,20383146,67093,1002,Myeloid_DCs,False,⋯,chr15_45739896_45742558,pDCs_U-Myeloid_DCs_U,-2.623013,6.295777e-06,,,,,ENSG00000008196_LINE2_TFAP2B_D_N1,0.6583003


The supplementary table from Ram-Mohan *et al.* are in Excel format. The Excel spreadsheet has 9 sheets.

In [9]:
sheets <- readxl::excel_sheets("03_Functional_Interpretation/data/Ram_Mohan_et_al_2022_Table_S1.xlsx")

ram.mohan.da.peaks <- lapply(sheets, function(sheet) {
    readxl::read_xlsx("03_Functional_Interpretation/data/Ram_Mohan_et_al_2022_Table_S1.xlsx", sheet=sheet) %>%
        dplyr::mutate(Stimulation=gsub("-DR", "", sheet))
}) %>%
    do.call(rbind, .)

In [10]:
ram.mohan.da.peaks <- ram.mohan.da.peaks %>%
    dplyr::mutate(chr=gsub("chr", "", Chromosome)) %>%
    dplyr::mutate(peak_id=paste0(chr, ":", Start, "-", End)) %>%
    dplyr::filter(chr %in% as.character(1:22)) %>%
    dplyr::mutate(chr = as.numeric(chr)) %>%
    dplyr::select(peak_id, chr, start=Start, end=End, stimulation=Stimulation, logFC=log2FC, pval=`p-value`)

In [11]:
head(ram.mohan.da.peaks)

peak_id,chr,start,end,stimulation,logFC,pval
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1:756651-756892,1,756651,756892,BGP,1.35,0.032
1:1430944-1431234,1,1430944,1431234,BGP,2.48,0.00022
1:2090706-2090915,1,2090706,2090915,BGP,1.52,0.0358
1:2480184-2480571,1,2480184,2480571,BGP,1.15,0.0213
1:3593886-3594335,1,3593886,3594335,BGP,1.28,0.02
1:3658517-3658765,1,3658517,3658765,BGP,1.48,0.00517


In [12]:
lead.snps <- read.table("~/gains_team282/nikhil/colocalization/cis_eqtl/fine_mapping/LD/lead_snps.80r2.tags.tsv", header=T) %>%
    dplyr::mutate(TAGS=gsub("NONE", "", TAGS))

In [13]:
head(lead.snps)

Unnamed: 0_level_0,SNP,CHR,BP,NTAG,LEFT,RIGHT,KBSPAN,TAGS
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<dbl>,<chr>
1,rs3131972,1,817341,0,817341,817341,0.001,
2,rs2272757,1,946247,0,946247,946247,0.001,
3,rs13303056,1,953778,33,941119,966227,25.109,rs4372192|rs6605067|rs2839|rs3748592|rs2340582|rs4246503|rs4970377|rs4970452|rs4970376|rs4970375|rs3748595|rs3828047|rs3748596|rs3748597|rs13302945|rs3828049|rs13303206|rs13303051|rs13302957|rs13303227|rs4970371|rs6605069|rs4970445|rs28393498|rs4970441|rs13303229|rs6696971|rs41285808|rs4970435|rs4970434|rs9697711|rs13303351|rs13302996
4,rs13303327,1,960326,4,951408,966179,14.772,rs10465242|rs13303010|rs3935066|rs13303160
5,rs13303033,1,979560,3,979560,983193,3.634,rs6694632|rs13303118|rs2341354
6,rs3128117,1,1009184,0,1009184,1009184,0.001,


In [14]:
conditional.snps <- read.table("~/gains_team282/nikhil/colocalization/cis_eqtl/fine_mapping/LD/conditional_snps.80r2.tags.tsv", header=T) %>%
    dplyr::mutate(TAGS=gsub("NONE", "", TAGS))

In [15]:
head(conditional.snps)

Unnamed: 0_level_0,SNP,CHR,BP,NTAG,LEFT,RIGHT,KBSPAN,TAGS
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<dbl>,<chr>
1,rs3131972,1,817341,0,817341,817341,0.001,
2,rs28731045,1,901149,7,898547,903175,4.629,rs28593608|rs28385272|rs4422949|rs28570054|rs72890788|rs4970384|rs4970383
3,rs142336952,1,918014,0,918014,918014,0.001,
4,rs2272757,1,946247,0,946247,946247,0.001,
5,rs4970377,1,950296,33,941119,966227,25.109,rs4372192|rs6605067|rs2839|rs3748592|rs2340582|rs4246503|rs4970452|rs4970376|rs4970375|rs3748595|rs3828047|rs3748596|rs3748597|rs13303056|rs13302945|rs3828049|rs13303206|rs13303051|rs13302957|rs13303227|rs4970371|rs6605069|rs4970445|rs28393498|rs4970441|rs13303229|rs6696971|rs41285808|rs4970435|rs4970434|rs9697711|rs13303351|rs13302996
6,rs13303056,1,953778,33,941119,966227,25.109,rs4372192|rs6605067|rs2839|rs3748592|rs2340582|rs4246503|rs4970377|rs4970452|rs4970376|rs4970375|rs3748595|rs3828047|rs3748596|rs3748597|rs13302945|rs3828049|rs13303206|rs13303051|rs13302957|rs13303227|rs4970371|rs6605069|rs4970445|rs28393498|rs4970441|rs13303229|rs6696971|rs41285808|rs4970435|rs4970434|rs9697711|rs13303351|rs13302996


In [16]:
geno.bim <- fread("~/gains_team282/Genotyping/All_genotyping_merged_filtered_b38_refiltered_rsID.bim") %>%
    as.data.frame()
colnames(geno.bim) <- c("chr", "snp", "cM", "Position", "minor_allele", "major_allele")

In [17]:
head(geno.bim)

Unnamed: 0_level_0,chr,snp,cM,Position,minor_allele,major_allele
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<chr>,<chr>
1,1,rs3131972,0,817341,A,G
2,1,rs546843995,0,818053,0,G
3,1,rs553916047,0,818359,0,A
4,1,1:818740_T_C,0,818740,T,C
5,1,rs145604921,0,819378,0,C
6,1,rs535256652,0,821053,0,T


## SNP Positions in hg19

Both Calderon *et al.* and Ram-Mohan *et al.* use hg19 coordinates. Since we have the cis-eQTL SNPs and their Ref-Seq IDs, we can find their locations in hg19 for direct comparisons.

In [18]:
# Convert LD blocks to long form
snps <- list()
tags <- list()

for (i in 1:nrow(lead.snps)) {
    
    snp.tags <- strsplit(lead.snps$TAGS[i], "\\|")[[1]]
    
    snps[[i]] <- rep(lead.snps$SNP[i], length(snp.tags) + 1)
    tags[[i]] <- c(lead.snps$SNP[i], snp.tags)
}

lead.snps.long <- data.frame(
    eSNP=unlist(snps),
    Tagging.SNP=unlist(tags)
) %>%
    merge(., geno.bim, by.x="Tagging.SNP", by.y="snp") %>%
    dplyr::mutate(chr.ucsc=paste0("chr", chr))

In [19]:
head(lead.snps.long)

Unnamed: 0_level_0,Tagging.SNP,eSNP,chr,cM,Position,minor_allele,major_allele,chr.ucsc
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>
1,10:45393944_T_C,rs58167894,10,0,45393944,T,C,chr10
2,2:224478491_A_G,rs112040555,2,0,224478491,A,G,chr2
3,6:33096683_T_C,rs1431403,6,0,33096683,T,C,chr6
4,rs1000033,rs6675427,1,0,226392686,G,T,chr1
5,rs10000407,rs6821444,4,0,146134155,C,A,chr4
6,rs10000407,rs17021106,4,0,146134155,C,A,chr4


In [20]:
# Convert LD blocks to long form
snps <- list()
tags <- list()

for (i in 1:nrow(conditional.snps)) {
    
    snp.tags <- strsplit(conditional.snps$TAGS[i], "\\|")[[1]]
    
    snps[[i]] <- rep(conditional.snps$SNP[i], length(snp.tags) + 1)
    tags[[i]] <- c(conditional.snps$SNP[i], snp.tags)
}

conditional.snps.long <- data.frame(
    eSNP=unlist(snps),
    Tagging.SNP=unlist(tags)
) %>%
    merge(., geno.bim, by.x="Tagging.SNP", by.y="snp") %>%
    dplyr::mutate(chr.ucsc=paste0("chr", chr))

In [21]:
head(conditional.snps.long)

Unnamed: 0_level_0,Tagging.SNP,eSNP,chr,cM,Position,minor_allele,major_allele,chr.ucsc
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>
1,10:45393944_T_C,rs58167894,10,0,45393944,T,C,chr10
2,2:224478491_A_G,rs112040555,2,0,224478491,A,G,chr2
3,6:33096683_T_C,rs1431403,6,0,33096683,T,C,chr6
4,rs10000254,rs2178467,4,0,177495952,A,G,chr4
5,rs10000254,rs56844945,4,0,177495952,A,G,chr4
6,rs1000033,rs6675427,1,0,226392686,G,T,chr1


In [22]:
lead.snps.ranges <- makeGRangesFromDataFrame(
    lead.snps.long, 
    seqnames.field="chr.ucsc", start.field="Position", end.field="Position", 
    keep.extra.columns=TRUE
)

conditional.snps.ranges <- makeGRangesFromDataFrame(
    conditional.snps.long, 
    seqnames.field="chr.ucsc", start.field="Position", end.field="Position", 
    keep.extra.columns=TRUE
)

In [23]:
chain <- import.chain("03_Functional_Interpretation/data/hg38ToHg19.over.chain")
chain

Chain of length 25
names(25): chr22 chr21 chr19 chr20 chrY chr18 ... chr6 chr5 chr4 chr3 chr2 chr1

In [24]:
seqlevelsStyle(lead.snps.ranges) <- "UCSC"
lead.snps.hg19 <- liftOver(lead.snps.ranges, chain) %>%
    unlist() %>%
    as.data.frame() %>%
    dplyr::select(eSNP, Tagging.SNP, chr, pos.hg19=start) %>%
    dplyr::arrange(eSNP)

In [25]:
seqlevelsStyle(conditional.snps.ranges) <- "UCSC"
conditional.snps.hg19 <- liftOver(conditional.snps.ranges, chain) %>%
    unlist() %>%
    as.data.frame() %>%
    dplyr::select(eSNP, Tagging.SNP, chr, pos.hg19=start) %>%
    dplyr::arrange(eSNP)

In [28]:
write.table(lead.snps.hg19, "~/gains_team282/epigenetics/calderon_et_al_hg19/lead_and_tag_snps_hg19.tsv", row.names=F, sep="\t", quote=F)
write.table(conditional.snps.hg19, "~/gains_team282/epigenetics/calderon_et_al_hg19/conditional_and_tag_snps_hg19.tsv", row.names=F, sep="\t", quote=F)

## Overlaps with DA Peaks

### Overlap with Calderon *et al.*

In [26]:
lead.snps.ranges <- makeGRangesFromDataFrame(lead.snps.hg19, keep.extra.columns=TRUE, start.field="pos.hg19", end.field="pos.hg19")

In [27]:
da.peaks <- makeGRangesFromDataFrame(calderon.da.peaks, keep.extra.columns=TRUE)

In [28]:
overlaps <- findOverlaps(lead.snps.ranges, da.peaks)

In [29]:
calderon.overlaps <- cbind(
    as.data.frame(lead.snps.ranges[overlaps@from,]),
    as.data.frame(da.peaks[overlaps@to,])
) %>%
    as.data.frame() %>%
    dplyr::select(esnp=6, tag_snp=7, snp_chr=1, snp_pos_hg19=2, peak_chr=8, peak_start=9, peak_end=10, peak_pval=16, peak_id, cell_type, logFC) %>%
    dplyr::select(esnp, tag_snp, tag_snp_chr=snp_chr, tag_snp_pos_hg19=snp_pos_hg19, peak_id, peak_chr, peak_start, peak_end, cell_type, peak_logFC=logFC, peak_pval)

In [30]:
head(calderon.overlaps)

Unnamed: 0_level_0,esnp,tag_snp,tag_snp_chr,tag_snp_pos_hg19,peak_id,peak_chr,peak_start,peak_end,cell_type,peak_logFC,peak_pval
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<int>,<chr>,<fct>,<int>,<int>,<chr>,<dbl>,<dbl>
1,rs10000507,rs1401358,4,185640057,4:185639912-185640263,4,185639912,185640263,Central_memory_CD8pos_T,-3.077595,0.0023233919
2,rs10000507,rs1401359,4,185640216,4:185639912-185640263,4,185639912,185640263,Central_memory_CD8pos_T,-3.077595,0.0023233919
3,rs10000507,rs2141257,4,185609463,4:185609285-185610052,4,185609285,185610052,Memory_Teffs,2.028882,0.0008247154
4,rs10000507,rs2141257,4,185609463,4:185609285-185610052,4,185609285,185610052,Th1_precursors,1.957103,0.0016682013
5,rs10000507,rs2141257,4,185609463,4:185609285-185610052,4,185609285,185610052,Th17_precursors,2.444759,0.0008956363
6,rs10000507,rs2141258,4,185609576,4:185609285-185610052,4,185609285,185610052,Memory_Teffs,2.028882,0.0008247154


In [31]:
write.table(calderon.overlaps, "~/gains_team282/nikhil/functional_interpretation/lead_snps_80r2_calderon_et_al_da_peaks_overlaps.tsv", quote=F, row.names=F, sep="\t")

In [32]:
conditional.snps.ranges <- makeGRangesFromDataFrame(conditional.snps.hg19, keep.extra.columns=TRUE, start.field="pos.hg19", end.field="pos.hg19")

In [33]:
overlaps <- findOverlaps(conditional.snps.ranges, da.peaks)

In [34]:
calderon.conditional.overlaps <- cbind(
    as.data.frame(conditional.snps.ranges[overlaps@from,]),
    as.data.frame(da.peaks[overlaps@to,])
) %>%
    as.data.frame() %>%
    dplyr::select(esnp=6, tag_snp=7, snp_chr=1, snp_pos_hg19=2, peak_chr=8, peak_start=9, peak_end=10, peak_pval=16, peak_id, cell_type, logFC) %>%
    dplyr::select(esnp, tag_snp, tag_snp_chr=snp_chr, tag_snp_pos_hg19=snp_pos_hg19, peak_id, peak_chr, peak_start, peak_end, cell_type, peak_logFC=logFC, peak_pval)

In [35]:
head(calderon.conditional.overlaps)

Unnamed: 0_level_0,esnp,tag_snp,tag_snp_chr,tag_snp_pos_hg19,peak_id,peak_chr,peak_start,peak_end,cell_type,peak_logFC,peak_pval
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<int>,<chr>,<fct>,<int>,<int>,<chr>,<dbl>,<dbl>
1,rs10000507,rs1401358,4,185640057,4:185639912-185640263,4,185639912,185640263,Central_memory_CD8pos_T,-3.077595,0.0023233919
2,rs10000507,rs1401359,4,185640216,4:185639912-185640263,4,185639912,185640263,Central_memory_CD8pos_T,-3.077595,0.0023233919
3,rs10000507,rs2141257,4,185609463,4:185609285-185610052,4,185609285,185610052,Memory_Teffs,2.028882,0.0008247154
4,rs10000507,rs2141257,4,185609463,4:185609285-185610052,4,185609285,185610052,Th1_precursors,1.957103,0.0016682013
5,rs10000507,rs2141257,4,185609463,4:185609285-185610052,4,185609285,185610052,Th17_precursors,2.444759,0.0008956363
6,rs10000507,rs2141258,4,185609576,4:185609285-185610052,4,185609285,185610052,Memory_Teffs,2.028882,0.0008247154


In [36]:
write.table(calderon.conditional.overlaps, "~/gains_team282/nikhil/functional_interpretation/conditional_snps_80r2_calderon_et_al_da_peaks_overlaps.tsv", quote=F, row.names=F, sep="\t")

### Overlap with Ram-Mohan *et al.*

In [37]:
lead.snps.ranges <- makeGRangesFromDataFrame(lead.snps.hg19, keep.extra.columns=TRUE, start.field="pos.hg19", end.field="pos.hg19")

In [38]:
da.peaks <- makeGRangesFromDataFrame(ram.mohan.da.peaks, keep.extra.columns=TRUE)

In [39]:
overlaps <- findOverlaps(lead.snps.ranges, da.peaks)

In [40]:
ram.mohan.overlaps <- cbind(
    as.data.frame(lead.snps.ranges[overlaps@from,]),
    as.data.frame(da.peaks[overlaps@to,])
) %>%
    as.data.frame() %>%
    dplyr::select(esnp=6, tag_snp=7, snp_chr=1, snp_pos_hg19=2, peak_chr=8, peak_start=9, peak_end=10, peak_pval=16, peak_id, stimulation, logFC) %>%
    dplyr::select(esnp, tag_snp, tag_snp_chr=snp_chr, tag_snp_pos_hg19=snp_pos_hg19, peak_id, peak_chr, peak_start, peak_end, stimulation, peak_logFC=logFC, peak_pval)

In [41]:
head(ram.mohan.overlaps)

Unnamed: 0_level_0,esnp,tag_snp,tag_snp_chr,tag_snp_pos_hg19,peak_id,peak_chr,peak_start,peak_end,stimulation,peak_logFC,peak_pval
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<int>,<chr>,<fct>,<int>,<int>,<chr>,<dbl>,<dbl>
1,rs1000137,rs1560955,7,157644534,7:157644410-157644721,7,157644410,157644721,HMGB,1.17,0.0121
2,rs1000137,rs1560955,7,157644534,7:157644483-157644721,7,157644483,157644721,EC1h,1.97,0.0136
3,rs1000137,rs1560957,7,157644701,7:157644410-157644721,7,157644410,157644721,HMGB,1.17,0.0121
4,rs1000137,rs1560957,7,157644701,7:157644483-157644721,7,157644483,157644721,EC1h,1.97,0.0136
5,rs10010544,rs3822001,4,57301750,4:57301645-57301882,4,57301645,57301882,SA,1.83,0.00711
6,rs10022384,rs12642711,4,39529146,4:39528844-39529709,4,39528844,39529709,EC1h,1.03,0.00112


In [42]:
write.table(ram.mohan.overlaps, "~/gains_team282/nikhil/functional_interpretation/lead_snps_80r2_ram_mohan_et_al_da_peaks_overlaps.tsv", quote=F, row.names=F, sep="\t")

In [43]:
conditional.snps.ranges <- makeGRangesFromDataFrame(conditional.snps.hg19, keep.extra.columns=TRUE, start.field="pos.hg19", end.field="pos.hg19")

In [44]:
overlaps <- findOverlaps(conditional.snps.ranges, da.peaks)

In [45]:
ram.mohan.conditional.overlaps <- cbind(
    as.data.frame(conditional.snps.ranges[overlaps@from,]),
    as.data.frame(da.peaks[overlaps@to,])
) %>%
    as.data.frame() %>%
    dplyr::select(esnp=6, tag_snp=7, snp_chr=1, snp_pos_hg19=2, peak_chr=8, peak_start=9, peak_end=10, peak_pval=16, peak_id, stimulation, logFC) %>%
    dplyr::select(esnp, tag_snp, tag_snp_chr=snp_chr, tag_snp_pos_hg19=snp_pos_hg19, peak_id, peak_chr, peak_start, peak_end, stimulation, peak_logFC=logFC, peak_pval)

In [46]:
head(ram.mohan.conditional.overlaps)

Unnamed: 0_level_0,esnp,tag_snp,tag_snp_chr,tag_snp_pos_hg19,peak_id,peak_chr,peak_start,peak_end,stimulation,peak_logFC,peak_pval
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<int>,<chr>,<fct>,<int>,<int>,<chr>,<dbl>,<dbl>
1,rs1000137,rs1560955,7,157644534,7:157644410-157644721,7,157644410,157644721,HMGB,1.17,0.0121
2,rs1000137,rs1560955,7,157644534,7:157644483-157644721,7,157644483,157644721,EC1h,1.97,0.0136
3,rs1000137,rs1560957,7,157644701,7:157644410-157644721,7,157644410,157644721,HMGB,1.17,0.0121
4,rs1000137,rs1560957,7,157644701,7:157644483-157644721,7,157644483,157644721,EC1h,1.97,0.0136
5,rs10007349,rs10021593,4,25235801,4:25235394-25235874,4,25235394,25235874,EC4h,1.23,0.0229
6,rs10010544,rs3822001,4,57301750,4:57301645-57301882,4,57301645,57301882,SA,1.83,0.00711


In [47]:
write.table(ram.mohan.conditional.overlaps, "~/gains_team282/nikhil/functional_interpretation/conditional_snps_80r2_ram_mohan_et_al_da_peaks_overlaps.tsv", quote=F, row.names=F, sep="\t")

## Overlaps with caQTL

In [48]:
lead.snps.ranges <- makeGRangesFromDataFrame(lead.snps.hg19, keep.extra.columns=TRUE, start.field="pos.hg19", end.field="pos.hg19")

In [49]:
ca.qtl <- makeGRangesFromDataFrame(calderon.ca.qtl, keep.extra.columns=TRUE, start.field="pos", end.field="pos")

In [50]:
overlaps <- findOverlaps(lead.snps.ranges, ca.qtl)

In [51]:
ca.qtl.overlaps <- cbind(
    as.data.frame(lead.snps.ranges[overlaps@from,]),
    as.data.frame(ca.qtl[overlaps@to,])
) %>%
    as.data.frame() %>%
    dplyr::select(esnp=6, tag_snp=7, snp_chr=1, snp_pos_hg19=2, cell, refAllele, altAllele, refCount, altCount, p, contrast_atac) %>%
    dplyr::select(esnp, tag_snp, tag_snp_chr=snp_chr, tag_snp_pos_hg19=snp_pos_hg19, cell_type=cell, ref=refAllele, alt=altAllele, ref_count=refCount, alt_count=altCount, p, contrast_atac) %>%
    unique()

In [52]:
head(ca.qtl.overlaps)

Unnamed: 0_level_0,esnp,tag_snp,tag_snp_chr,tag_snp_pos_hg19,cell_type,ref,alt,ref_count,alt_count,p,contrast_atac
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<int>,<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<chr>
1,rs10205219,rs4665969,2,27574953,Th1_precursors,T,C,0,5,0.0625,Th1_precursors_S-Th1_precursors_U
14785,rs10419627,rs9384,19,13010643,Immature_NK,G,T,9,2,0.06542969,Immature_NK_U-Mature_NK_U
14787,rs10419627,rs9384,19,13010643,Immature_NK,G,T,9,2,0.06542969,Immature_NK_U-Memory_NK_U
119665,rs10790519,rs11606532,11,122535611,Myeloid_DCs,G,A,0,5,0.0625,Monocytes_U-Myeloid_DCs_U
119714,rs10934592,rs6780306,3,122225454,Effector_CD4pos_T,T,C,0,5,0.0625,Effector_CD4pos_T_S-Effector_CD4pos_T_U
119830,rs116374652,rs9384,19,13010643,Immature_NK,G,T,9,2,0.06542969,Immature_NK_U-Mature_NK_U


In [53]:
write.table(ca.qtl.overlaps, "~/gains_team282/nikhil/functional_interpretation/lead_snps_80r2_calderon_et_al_ca_qtl_overlaps.tsv", quote=F, row.names=F, sep="\t")

In [54]:
conditional.snps.ranges <- makeGRangesFromDataFrame(conditional.snps.hg19, keep.extra.columns=TRUE, start.field="pos.hg19", end.field="pos.hg19")

In [55]:
overlaps <- findOverlaps(conditional.snps.ranges, ca.qtl)

In [56]:
ca.qtl.conditional.overlaps <- cbind(
    as.data.frame(conditional.snps.ranges[overlaps@from,]),
    as.data.frame(ca.qtl[overlaps@to,])
) %>%
    as.data.frame() %>%
    dplyr::select(esnp=6, tag_snp=7, snp_chr=1, snp_pos_hg19=2, cell, refAllele, altAllele, refCount, altCount, p, contrast_atac) %>%
    dplyr::select(esnp, tag_snp, tag_snp_chr=snp_chr, tag_snp_pos_hg19=snp_pos_hg19, cell_type=cell, ref=refAllele, alt=altAllele, ref_count=refCount, alt_count=altCount, p, contrast_atac) %>%
    unique()

In [57]:
head(ca.qtl.conditional.overlaps)

Unnamed: 0_level_0,esnp,tag_snp,tag_snp_chr,tag_snp_pos_hg19,cell_type,ref,alt,ref_count,alt_count,p,contrast_atac
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<int>,<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<chr>
1,rs10205219,rs4665969,2,27574953,Th1_precursors,T,C,0,5,0.0625,Th1_precursors_S-Th1_precursors_U
14785,rs10419627,rs9384,19,13010643,Immature_NK,G,T,9,2,0.06542969,Immature_NK_U-Mature_NK_U
14787,rs10419627,rs9384,19,13010643,Immature_NK,G,T,9,2,0.06542969,Immature_NK_U-Memory_NK_U
119665,rs10521209,rs11647841,16,50743331,Monocytes,G,A,1,7,0.0703125,Monocytes_U-Myeloid_DCs_U
120853,rs10934592,rs6780306,3,122225454,Effector_CD4pos_T,T,C,0,5,0.0625,Effector_CD4pos_T_S-Effector_CD4pos_T_U
120969,rs11216949,rs488141,11,118571168,Effector_memory_CD8pos_T,A,G,10,1,0.01171875,Effector_memory_CD8pos_T_S-Effector_memory_CD8pos_T_U


In [58]:
write.table(ca.qtl.conditional.overlaps, "~/gains_team282/nikhil/functional_interpretation/conditional_snps_80r2_calderon_et_al_ca_qtl_overlaps.tsv", quote=F, row.names=F, sep="\t")