-
Notifications
You must be signed in to change notification settings - Fork 0
/
region_enrichment_analysis_GREAT.R
77 lines (63 loc) · 2.93 KB
/
region_enrichment_analysis_GREAT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# load libraries
library(LOLA)
library(GenomicRanges)
library(rGREAT)
# configs
regions_file <- snakemake@input[["regions"]]
background_file <- snakemake@input[["background"]]
gene_file <- snakemake@output[["genes"]]
result_paths <- snakemake@output[["results"]]
genome <- snakemake@config[["genome"]] #"hg38"
databases <- snakemake@config[["great_dbs"]]
region_set <- snakemake@params[["region_set"]]
dir_result <- dirname(gene_file)
# make result directory, if not exist
dir.create(file.path(dir_result), showWarnings = FALSE, recursive = TRUE)
# load query and background/universe region sets (eg consensus region set)
regionSet_query <- unname(readBed(regions_file))
# create empty results if query region set is too large for GREAT and quit i.e. >500,000 https://great-help.atlassian.net/wiki/spaces/GREAT/pages/655402/File+Size
if (length(regionSet_query)>500000){
print("query regions set is too large (>500,000)")
for (path in result_paths){
file.create(path)
}
file.create(file.path(gene_file))
quit()
}
# if query and background the same, do not use a background (used to map background regions to genes for downstream gene-based analyses)
if(regions_file!=background_file){
regionSet_background <- unname(readBed(background_file))
# check if background region set is not too large https://great-help.atlassian.net/wiki/spaces/GREAT/pages/655402/File+Size
if (length(regionSet_background)>1000000){
print("background regions set is too large (>1,000,000) and set to NULL i.e. whole genome")
regionSet_background <- NULL
}
}else{
regionSet_background <- NULL
}
###### GREAT
job = submitGreatJob(regionSet_query, species = genome, bg=regionSet_background)
# save job description
capture.output(job, file=file.path(dir_result, 'job_description.txt'), append=TRUE)
# plot, get and save gene-region association
pdf(file=file.path(dir_result, paste0('region_gene_associations.pdf')), width=20, height=15)
res = plotRegionGeneAssociationGraphs(job, request_interval=600)
write.csv(res, file.path(dir_result, 'region_gene_associations.csv'), row.names=TRUE)
dev.off()
# save unique associated genes by using mcols(), which returns a DataFrame object containing the metadata columns.
genes <- unique(mcols(res)$gene)
write(genes, file.path(gene_file))
# write.table(genes, file.path(gene_file), sep='\t', row.names=FALSE, col.names=FALSE, quote = FALSE)
# get & save enrichment results
if(regions_file!=background_file){
for (db in databases){
tb <- getEnrichmentTables(job, ontology = db, category=NULL, download_by = 'tsv')
tb$Desc <- paste(tb$Desc, tb$ID)
# write.table(tb[[db]], file.path(dir_result, db, paste0(region_set,'_',db,'.tsv')), sep='\t', row.names=FALSE)
write.csv(tb[[db]], file.path(dir_result, gsub(" ", "_", db), paste0(region_set,'_',gsub(" ", "_", db),'.csv')), row.names=FALSE)
}
}else{
for (path in result_paths){
file.create(path)
}
}