-
Notifications
You must be signed in to change notification settings - Fork 43
/
process_response.R
109 lines (105 loc) · 3.95 KB
/
process_response.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#' Process a Canvas API response
#'
#' Wrapper function for common tasks in going from Canvas url to dataframe. Most
#' of the heavy lifting is done in \code{paginate}, which finds which pages to
#' download. This function adds necessary arguments to those pages (e.g. the
#' authentication token), downloads the content, converts from JSON into data
#' frame format, and if there are multiple pages/dataframes, converts it into
#' one final dataframe.
#'
#' @param url url to query
#' @param args query arguments to be passed to \code{httr}, e.g. auth token
#'
#' @return processed dataframe
#'
process_response <- function(url, args) {
resp <- canvas_query(url, args, "GET")
df <- paginate(resp) %>%
purrr::map(httr::content, "text") %>%
purrr::map(jsonlite::fromJSON, flatten = TRUE)
df <- tryCatch({
df %>% purrr::map_df(purrr::flatten_df)
},
error = function(e) {
df %>% dplyr::bind_rows()
}
)
return(df)
}
#' @title Get responses from Canvas API pages
#'
#' @description The Canvas headers include a link object (usually), in form:
#' \code{Link:
#' <https://canvas.url/api/v1/[...]?page=1&per_page=10>; rel="current",
#' <https://canvas.url/api/v1/[...]?page=2&per_page=10>; rel="next",
#' <https://canvas.url/api/v1/[...]?page=1&per_page=10>; rel="first",
#' <https://canvas.url/api/v1/[...]?page=15&per_page=10>; rel="last"}
#'
#' In this case, we need to download every page from 1 to 15 to capture all data.
#' This function parses the response object intelligently, using only HEAD
#' requests, to figure out these page requirements.
#'
#' @param x a httr response object
#' @param showProgress if TRUE (default), show a textual progress bar
#'
#' @return unparsed responses
#'
#' @examples
#' \dontrun{resp <- canvas_query(url, args, "HEAD")
#' get_pages(resp)}
paginate <- function(x, showProgress=T) {
first_response <- list(x)
stopifnot(httr::status_code(x) == 200) # OK status
pages <- httr::headers(x)$link
if (is.null(pages)) return(first_response)
should_continue <- TRUE
inc <- 2
if (has_rel(pages, "last")) {
last_page <- get_page(x, "last")
n_pages <- readr::parse_number(stringr::str_extract(last_page, "page=[0-9]{1,}"))
if (n_pages == 1) return(first_response)
pages <- increment_pages(last_page, 2:n_pages)
if (showProgress) bar = txtProgressBar(max=n_pages, style = 3)
queryfunc = function(...) {if (showProgress) bar$up(bar$getVal()+1); canvas_query(...)}
responses <- pages %>%
purrr::map(queryfunc, args = list(access_token = check_token()))
responses <- c(first_response, responses)
return(responses)
} else if (has_rel(httr::headers(x)$link, "next")) {
# edge case for if there is no 'last' header, see:
# https://canvas.instructure.com/doc/api/file.pagination.html
# https://github.com/daranzolin/rcanvas/issues/4
while (should_continue) {
page_temp <- get_page(x, "next")
pages[[inc]] <- page_temp
x <- canvas_query(page_temp,
args = list(access_token = check_token()),
type = "HEAD")
if (!has_rel(httr::headers(x)$link, "next")) {
should_continue <- FALSE
} else {
inc <- inc + 1
}
}
responses <- pages %>%
purrr::map(canvas_query, args = list(access_token = check_token()))
}
}
increment_pages <- function(base_url, n_pages) {
# odd regex but necessary, see http://regexr.com/3evr4
stringr::str_replace(base_url, "([\\?&])(page=[0-9a-zA-Z]{1,})",
sprintf("\\1page=%s", n_pages))
}
has_rel <- function(x, rel) {
stopifnot(!is.null(rel))
any(grepl(paste0("rel=\"", rel, "\""), x))
}
get_page <- function(resp, page) {
pages <- resp$headers$link
url_pattern <- "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
pages <- stringr::str_split(pages, ",")[[1]]
url <- stringr::str_subset(pages, page)
url <- stringr::str_extract(url, url_pattern)
url <- stringr::str_replace_all(url, "[<>;]", "")
return(url)
}