Skip to content

Commit

Permalink
clean html text by default
Browse files Browse the repository at this point in the history
  • Loading branch information
corriebar committed Feb 10, 2020
1 parent 64500e8 commit 229204a
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 8 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Expand Up @@ -25,4 +25,6 @@ Imports:
stringr,
magrittr,
rlang,
tidyr
tidyr,
rvest,
xml2
16 changes: 13 additions & 3 deletions R/api_functions.R
Expand Up @@ -136,14 +136,24 @@ get_claims <- function(pages=1, remove_duplicates=TRUE) {
review_id = .data$claim_review)
}

strip_html <- function(s) {
rvest::html_text(xml2::read_html(s))
}

#' @describeIn get_claims Get claim reviews.
#' @param clean_html If TRUE, then add another column `text` which is a plain text version of `html_text`
#' @export
get_claim_reviews <- function(pages=1) {
get_claim_reviews <- function(pages=1, clean_html=TRUE) {
path <- "claim_reviews"
reviews <- paginate_resps(path, pages)
reviews %>%
reviews <- reviews %>%
dplyr::select(claims_id = .data$item_reviewed, .data$type:.data$text) %>%
dplyr::rename(review_name = .data$name)
dplyr::rename(review_name = .data$name, html_text = .data$text)
if (clean_html) {
reviews <- reviews %>%
dplyr::mutate(text = purrr::map_chr(.data$html_text, .f=strip_html))
}
reviews
}


Expand Down
4 changes: 2 additions & 2 deletions R/disinfo_functions.R
Expand Up @@ -116,8 +116,8 @@ add_claims <- function(disinfo, pages = 1) {

#' @describeIn add_claims Download claim reviews data and add to disinfo object.
#' @export
add_reviews <- function(disinfo, pages = 1) {
reviews <- get_claim_reviews(pages)
add_reviews <- function(disinfo, pages = 1, clean_html=TRUE) {
reviews <- get_claim_reviews(pages, clean_html=clean_html)
new_disinfo(claims=disinfo$claims,
reviews = reviews,
authors = disinfo$authors,
Expand Down
2 changes: 1 addition & 1 deletion man/add_claims.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_claims.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 229204a

Please sign in to comment.