-
Notifications
You must be signed in to change notification settings - Fork 1
/
create_great_annotation.R
53 lines (49 loc) · 2.16 KB
/
create_great_annotation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=
# Article:
# Integrative meta-analysis of epigenome-wide association studies
# identifies genomic and
# epigenomics differences in the brain and the blood in Alzheimer’s disease
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=
# Authors:
# - Tiago C. silva
# - Lily Wang
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=
# Date: 21 July 2021
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=
# Article section:
# Correlations between methylation levels of significant CpGs and DMRs in AD
# with expressions of nearby genes
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=
# Creates GREAT annotation for EPIC array
#-----------------------------------------------------------------------------
# Libs
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=
library(rGREAT)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
library(S4Vectors)
epic.hg19 <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
epic.hg19.gr <- epic.hg19 %>% makeGRangesFromDataFrame(
start.field = "pos", end.field = "pos", keep.extra.columns = TRUE
)
regionsToGenes.list <- plyr::alply(
seq(1,length(epic.hg19.gr),50000),
.margins = 1,
.fun = function(start){
end <- (start + 50000 - 1)
if(end > length(epic.hg19.gr)) end <- length(epic.hg19.gr)
job <- submitGreatJob(epic.hg19.gr[start:end], species = "hg19")
Sys.sleep(70)
data.frame(plotRegionGeneAssociationGraphs(job))
},.progress = "time")
regionsToGenes <- plyr::rbind.fill(regionsToGenes.list)
regionsToGenes$GREAT_annotation <- ifelse(
regionsToGenes$distTSS > 0,
paste0(regionsToGenes$gene, " (+", regionsToGenes$distTSS, ")"),
paste0(regionsToGenes$gene, " (", regionsToGenes$distTSS, ")"))
regionsToGenes <- regionsToGenes[
,c("seqnames", "start", "end", "GREAT_annotation")
]
great <- regionsToGenes %>%
group_by(seqnames, start, end) %>%
mutate(GREAT_annotation = paste0(GREAT_annotation,collapse = ";")) %>% unique()
save(great,file = "great_EPIC_array_annotation.rda")