/
prepareGeneSetFiles.R
82 lines (82 loc) · 2.06 KB
/
prepareGeneSetFiles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#' Prepare gene set files
#'
#' @details
#' Intended primarily to match GMT files from MSigDb.
#'
#' @note Updated 2021-10-20.
#' @export
#'
#' @inheritParams params
#'
#' @param dir `character(1)`.
#' Directory name containing MSigDb release.
#'
#' @param ext `character(1)`.
#' Gene set file extension.
#' Case insensitive.
#'
#' @param recursive `logical(1)`.
#' Whether to search for gene set files recursively in `dir` argument.
#'
#' @seealso
#' - https://www.gsea-msigdb.org/gsea/msigdb/
#' - https://www.ncbi.nlm.nih.gov/gene/
#'
#' @examples
#' dir <- system.file(
#' "extdata",
#' "msigdb",
#' "7.0",
#' "msigdb_v7.0_GMTs",
#' package = "AcidGSEA",
#' mustWork = TRUE
#' )
#' files <- prepareGeneSetFiles(dir, keyType = "geneName")
#' print(files)
prepareGeneSetFiles <-
function(dir,
keyType = c("geneName", "ncbiGeneId"),
ext = "gmt",
recursive = FALSE) {
assert(
isADir(dir),
isString(ext),
isFlag(recursive)
)
keyType <- match.arg(keyType)
keyType2 <- switch(
EXPR = keyType,
"ncbiGeneId" = "entrez",
"geneName" = "symbols"
)
dir <- realpath(dir)
files <- sort(list.files(
path = dir,
pattern = paste0("*.", keyType2, "\\.", ext, "$"),
full.names = TRUE,
recursive = recursive,
ignore.case = TRUE
))
assert(
hasLength(files),
msg = sprintf(
"Failed to detect any gene sets in {.dir %s}.",
dir
)
)
files <- realpath(files)
alertInfo(sprintf(
"Detected %d %s %s of {.var %s} {.val %s} in {.path %s}.",
length(files),
toupper(ext),
ngettext(
n = length(files),
msg1 = "file",
msg2 = "files"
),
"keyType", keyType,
dir
))
names(files) <- snakeCase(basenameSansExt(files))
files
}