/
to_document.R
155 lines (141 loc) · 5.25 KB
/
to_document.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#' Convert R Notebook to `html_document`
#'
#' Copies a file using [fs::file_copy()] and changes the output type in the yaml front matter from
#' `html_notebook` to `html_document`, removing all other output types.
#'
#' @param file_path Path to the source file
#' @param new_path Path to copy the converted file using [fs::file_copy()]
#' @param overwrite Overwrite file if it exists, passed to [fs::file_copy()]
#'
#' @return Path to new file
#'
#' @seealso [build_analysis_site()]
#' @examples
#' \dontrun{
#' to_document("notebook.Rmd", "document.Rmd")
#' to_document("notebooks_dir/notebook.Rmd", "documents_dir")
#' }
#' @export
to_document <- function(file_path, new_path, overwrite = FALSE) {
checkmate::assert_string(file_path, min.chars = 1)
checkmate::assert_string(new_path, min.chars = 1)
checkmate::assert_flag(overwrite)
if (!(fs::path_ext(file_path) %in% c("Rmd", "rmd"))) {
stop("'", file_path, "' is not an R Markdown (*.Rmd) file")
}
notebook <- readLines(file_path)
header <- grep("^---$", notebook)
yaml <- rmarkdown::yaml_front_matter(file_path)
if (length(header) < 2 || length(yaml) < 1) {
stop("'", file_path, "' is not a valid R Notebook")
}
if (is.character(yaml$output)) {
if (yaml$output != "html_notebook") {
stop("'", file_path, "' does not contain `output: html_notebook`")
}
yaml$output <- "html_document"
} else if (is.list(yaml$output)) {
if (is.null(yaml$output$html_notebook)) {
stop("'", file_path, "' does not contain `output: html_notebook`")
}
yaml$output <- list(html_document = yaml$output$html_notebook)
} else {
stop("unexpected output object type '", typeof(yaml$output), "'")
}
body_start <- header[2] + 1
body_end <- length(notebook)
nb_body <- notebook[body_start:body_end]
notebook <- c(
"---",
gsub("\\n$", "", yaml::as.yaml(yaml)),
"---",
nb_body
)
new_file <- fs::file_copy(file_path, new_path, overwrite = overwrite)
writeLines(notebook, new_file)
return(new_file)
}
#' Get analysis notebook metadata
#'
#' Extract the YAML front matter and 'description' line from an
#' [analysis notebook](https://jabenninghoff.github.io/rdev/articles/analysis-package-layout.html),
#' and construct a URL to the notebook's location on GitHub pages.
#'
#' The 'description' line is the the first non-blank line in the body of an R notebook that serves
#' as a brief description of the work.
#'
#' If `_quarto.yml` is present, `rmd_metadata()` will extract the YAML front matter and description
#' from Quarto format (`.qmd`) notebooks.
#'
#' @param file_path Path to analysis notebook
#'
#' @return Named list containing analysis notebook title, URL, date, and description
#' @export
rmd_metadata <- function(file_path) { # nolint: cyclocomp_linter.
checkmate::assert_string(file_path, min.chars = 1)
quarto <- fs::file_exists("_quarto.yml")
file_ext <- fs::path_ext(file_path)
if (quarto) {
if (!(file_ext %in% c("Rmd", "rmd", "qmd"))) {
stop("'", file_path, "' is not an R Markdown (*.Rmd) or Quarto (*.qmd) file")
}
invalid_file_msg <- "is not a valid R Notebook or Quarto file"
} else {
if (!(file_ext %in% c("Rmd", "rmd"))) {
stop("'", file_path, "' is not an R Markdown (*.Rmd) file")
}
invalid_file_msg <- "is not a valid R Notebook"
}
notebook <- readLines(file_path)
header <- grep("^---$", notebook)
yaml <- rmarkdown::yaml_front_matter(file_path)
if (length(header) < 2 || length(yaml) < 1) {
stop("'", file_path, "' ", invalid_file_msg)
}
if (file_ext == "qmd") {
# qmd files require format: html
if (is.null(yaml$format)) {
stop("'", file_path, "' does not contain `format: html`")
} else if (is.character(yaml$format)) {
if (yaml$format != "html") {
stop("'", file_path, "' does not contain `format: html`")
}
} else if (is.list(yaml$format)) {
if (is.null(yaml$format$html)) {
stop("'", file_path, "' does not contain `format: html`")
}
} else {
stop("unexpected output object type '", typeof(yaml$format), "'")
}
} else {
# Rmd files require output: html_notebook
if (is.null(yaml$output)) {
stop("'", file_path, "' does not contain `output: html_notebook`")
} else if (is.character(yaml$output)) {
if (yaml$output != "html_notebook") {
stop("'", file_path, "' does not contain `output: html_notebook`")
}
} else if (is.list(yaml$output)) {
if (is.null(yaml$output$html_notebook)) {
stop("'", file_path, "' does not contain `output: html_notebook`")
}
} else {
stop("unexpected output object type '", typeof(yaml$output), "'")
}
}
body_start <- header[2] + 1
body_end <- length(notebook)
desc_line <- grep("[:graph:]", notebook[body_start:body_end])[1] + header[2]
urls <- desc::desc_get_urls()
if (length(urls) < 1) {
stop("no URL found in DESCRIPTION")
}
# set separator to "/" only if first URL doesn't end with "/"
sep <- ifelse(endsWith(urls[1], "/"), "", "/")
# add analysis to path if using Quarto
sep <- ifelse(quarto, paste0(sep, "analysis/"), sep)
gh_url <- paste0(
urls[1], sep, fs::path_ext_remove(fs::path_file(file_path)), ".html"
)
list(title = yaml$title, url = gh_url, date = yaml$date, description = notebook[desc_line])
}