Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Object names #60

Merged
merged 16 commits into from
Sep 21, 2020
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ Imports:
RoxygenNote: 7.1.1
Suggests:
knitr,
rmarkdown
rmarkdown,
testthat
VignetteBuilder: knitr
32 changes: 20 additions & 12 deletions R/add_batch.r
Original file line number Diff line number Diff line change
Expand Up @@ -114,43 +114,51 @@ add_object_keyword <- function(keyword, time = "2010-01-01 2020-07-31") {
.add_keyword_batch <- function(type, keyword, time) {
if (length(keyword) > 5) stop("Error: Lenght of list elements must not exceed 5.\nYou provided a list elment with length > 5.")
if (type == "control") {
if (nrow(keywords_control) == 0) {
if (nrow(.keywords_control) == 0) {
new_batch <- 1
} else {
new_batch <- max(keywords_control$batch) + 1
new_batch <- max(.keywords_control$batch) + 1
}
data <- tibble(batch = new_batch, keyword, type = "control")
dbWriteTable(conn = globaltrends_db, name = "batch_keywords", value = data, append = TRUE)
data <- tibble(batch = new_batch, time = time, type = "control")
dbWriteTable(conn = globaltrends_db, name = "batch_time", value = data, append = TRUE)
keywords_control <- filter(batch_keywords, type == "control")
keywords_control <- filter(.tbl_keywords, type == "control")
keywords_control <- select(keywords_control, -type)
keywords_control <- collect(keywords_control)
assign("keywords_control", keywords_control, envir = .GlobalEnv)
time_control <- filter(batch_time, type == "control")
lst_export <- list(keywords_control, keywords_control)
names(lst_export) <- list("keywords_control", ".keywords_control")
invisible(list2env(lst_export, envir = .GlobalEnv))
time_control <- filter(.tbl_time, type == "control")
time_control <- select(time_control, -type)
time_control <- collect(time_control)
assign("time_control", time_control, envir = .GlobalEnv)
lst_export <- list(time_control, time_control)
names(lst_export) <- list("time_control", ".time_control")
invisible(list2env(lst_export, envir = .GlobalEnv))
message(glue("Successfully created new control batch {new_batch} ({keyword_collapse}, {time}).", keyword_collapse = paste(keyword, collapse = ", ")))
return(new_batch)
} else if (type == "object") {
if (nrow(keywords_object) == 0) {
if (nrow(.keywords_object) == 0) {
new_batch <- 1
} else {
new_batch <- max(keywords_object$batch) + 1
new_batch <- max(.keywords_object$batch) + 1
}
data <- tibble(batch = new_batch, keyword, type = "object")
dbWriteTable(conn = globaltrends_db, name = "batch_keywords", value = data, append = TRUE)
data <- tibble(batch = new_batch, time = time, type = "object")
dbWriteTable(conn = globaltrends_db, name = "batch_time", value = data, append = TRUE)
keywords_object <- filter(batch_keywords, type == "object")
keywords_object <- filter(.tbl_keywords, type == "object")
keywords_object <- select(keywords_object, -type)
keywords_object <- collect(keywords_object)
assign("keywords_object", keywords_object, envir = .GlobalEnv)
time_object <- filter(batch_time, type == "object")
lst_export <- list(keywords_object, keywords_object)
names(lst_export) <- list("keywords_object", ".keywords_object")
invisible(list2env(lst_export, envir = .GlobalEnv))
time_object <- filter(.tbl_time, type == "object")
time_object <- select(time_object, -type)
time_object <- collect(time_object)
assign("time_object", time_object, envir = .GlobalEnv)
lst_export <- list(time_object, time_object)
names(lst_export) <- list("time_object", ".time_object")
invisible(list2env(lst_export, envir = .GlobalEnv))
message(glue("Successfully created new object batch {new_batch} ({keyword_collapse}, {time}).", keyword_collapse = paste(keyword, collapse = ", ")))
return(new_batch)
} else {
Expand Down
35 changes: 26 additions & 9 deletions R/compute_doi.r
Original file line number Diff line number Diff line change
Expand Up @@ -57,29 +57,46 @@ compute_doi.numeric <- function(control, object, locations = "countries") {
control <- control[[1]]
walk(c(control, object), .test_batch)
if (.test_empty(table = "data_doi", batch_c = control, batch_o = object, locations = locations)) {
data <- collect(filter(data_score, batch_c == control & batch_o == object))
data <- filter(data, location %in% pull(collect(filter(data_locations, type == locations)), location))
data <- collect(filter(.tbl_score, batch_c == control & batch_o == object))
data <- filter(
data,
location %in% pull(
collect(
filter(
.tbl_locations,
type == locations
)
),
location
)
)

# run dict replace
if (any(data$keyword %in% keyword_synonyms$keyword)) {
keyword1 <- unique(data$keyword[data$keyword %in% keyword_synonyms$keyword])
if (any(data$keyword %in% .keyword_synonyms$keyword)) {
keyword1 <- unique(data$keyword[data$keyword %in% .keyword_synonyms$keyword])
out <- map_dfr(keyword1, ~ {
keyword2 <- keyword_synonyms$synonym[keyword_synonyms$keyword == .x]
keyword2 <- .keyword_synonyms$synonym[.keyword_synonyms$keyword == .x]
if (!any(keyword2 %in% data$keyword)) {
out <- keywords_object$batch[keywords_object$keyword == keyword2]
out <- .keywords_object$batch[.keywords_object$keyword == keyword2]
out <- filter(data_score, batch_c == control & batch_o == out)
out <- collect(out)
out <- out[out$keyword == keyword2, ]
return(out)
}
})
data <- bind_rows(data, out)
data$keyword <- str_replace_all(data$keyword, set_names(keyword_synonyms$keyword[keyword_synonyms$keyword %in% keyword1], keyword_synonyms$synonym[keyword_synonyms$keyword %in% keyword1]))
data$keyword <- str_replace_all(
data$keyword,
set_names(
.keyword_synonyms$keyword[.keyword_synonyms$keyword %in% keyword1],
.keyword_synonyms$synonym[.keyword_synonyms$keyword %in% keyword1]
)
)
data <- group_by(data, location, date, keyword, batch_c, batch_o)
data <- summarise_if(data, is.double, sum)
data <- ungroup(data)
}
data <- data[!(data$keyword %in% keyword_synonyms$synonym), ]
data <- data[!(data$keyword %in% .keyword_synonyms$synonym), ]

# compute doi measures
out <- pivot_longer(data, cols = contains("score"), names_to = "type", values_to = "score")
Expand All @@ -95,7 +112,7 @@ compute_doi.numeric <- function(control, object, locations = "countries") {
out <- mutate(out, batch_c = control, batch_o = object, locations = locations)
dbWriteTable(conn = globaltrends_db, name = "data_doi", value = out, append = TRUE)
}
message(glue("Successfully computed DOI | control: {control} | object: {object} [{object}/{total}]", total = max(keywords_object$batch)))
message(glue("Successfully computed DOI | control: {control} | object: {object} [{object}/{total}]", total = max(.keywords_object$batch)))
}

#' @rdname compute_doi
Expand Down
51 changes: 40 additions & 11 deletions R/compute_score.r
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ compute_score.numeric <- function(control, object, locations = countries) {
walk(c(control, object), .test_batch)
walk(locations, ~ {
if (.test_empty(table = "data_score", batch_c = control, batch_o = object, location = .x)) {
qry_mapping <- filter(data_mapping, batch_c == control & batch_o == object & location == .x)
qry_mapping <- filter(.tbl_mapping, batch_c == control & batch_o == object & location == .x)
qry_mapping <- collect(qry_mapping)
if (nrow(qry_mapping) != 0) {
qry_control <- filter(data_control, batch == control & location == .x)
qry_control <- filter(.tbl_control, batch == control & location == .x)
qry_control <- collect(qry_control)
qry_object <- filter(data_object, batch == object & location == .x)
qry_object <- filter(.tbl_object, batch == object & location == .x)
qry_object <- collect(qry_object)

qry_control <- mutate(qry_control, date = as_date(date))
Expand All @@ -78,7 +78,13 @@ compute_score.numeric <- function(control, object, locations = countries) {
qry_object <- .reset_date(qry_object)
qry_mapping <- .reset_date(qry_mapping)

if (min(nrow(count(qry_control, date)), nrow(count(qry_object, date)), nrow(count(qry_mapping, date))) >= 24) {
if (
min(
nrow(count(qry_control, date)),
nrow(count(qry_object, date)),
nrow(count(qry_mapping, date))
) >= 24
) {
# adjust to time series and impute negative values
qry_control <- nest(qry_control, data = c(date, hits))
qry_control <- mutate(qry_control, data = map(data, .adjust_ts))
Expand Down Expand Up @@ -126,12 +132,30 @@ compute_score.numeric <- function(control, object, locations = countries) {
)
)
}
qry_control <- pivot_longer(qry_control, cols = contains("hits"), names_to = "key", values_to = "value")
qry_object <- pivot_longer(qry_object, cols = contains("hits"), names_to = "key", values_to = "value")
qry_mapping <- pivot_longer(qry_mapping, cols = contains("hits"), names_to = "key", values_to = "value")
qry_control <- pivot_longer(
qry_control,
cols = contains("hits"),
names_to = "key",
values_to = "value"
)
qry_object <- pivot_longer(
qry_object,
cols = contains("hits"),
names_to = "key",
values_to = "value"
)
qry_mapping <- pivot_longer(qry_mapping,
cols = contains("hits"),
names_to = "key",
values_to = "value"
)

# set to benchmark
tmp_con <- inner_join(qry_mapping, qry_control, by = c("location", "keyword", "date", "key"), suffix = c("_m", "_c"))
tmp_con <- inner_join(qry_mapping,
qry_control,
by = c("location", "keyword", "date", "key"),
suffix = c("_m", "_c")
)
tmp_con <- mutate(tmp_con,
value_m = case_when(value_m == 0 ~ 1, TRUE ~ value_m),
value_c = case_when(value_c == 0 ~ 1, TRUE ~ value_c)
Expand All @@ -142,7 +166,12 @@ compute_score.numeric <- function(control, object, locations = countries) {
tmp_con <- mutate(tmp_con, value = value * benchmark)
tmp_con <- select(tmp_con, location, date, key, keyword, value)

tmp_obj <- inner_join(qry_mapping, qry_object, by = c("location", "keyword", "date", "key"), suffix = c("_m", "_o"))
tmp_obj <- inner_join(
qry_mapping,
qry_object,
by = c("location", "keyword", "date", "key"),
suffix = c("_m", "_o")
)
tmp_obj <- mutate(tmp_obj,
value_m = case_when(value_m == 0 ~ 1, TRUE ~ value_m),
value_o = case_when(value_o == 0 ~ 1, TRUE ~ value_o)
Expand All @@ -163,8 +192,8 @@ compute_score.numeric <- function(control, object, locations = countries) {
key = str_replace(key, "hits_", "score_")
)
object_agg <- select(object_agg, location, date, keyword, key, score)
data_score <- pivot_wider(object_agg, names_from = key, values_from = score, values_fill = 0)
out <- mutate(data_score, batch_c = control, batch_o = object)
out_score <- pivot_wider(object_agg, names_from = key, values_from = score, values_fill = 0)
out <- mutate(out_score, batch_c = control, batch_o = object)
dbWriteTable(conn = globaltrends_db, name = "data_score", value = out, append = TRUE)
}
}
Expand Down
4 changes: 2 additions & 2 deletions R/download_control.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ download_control <- function(control, locations = countries) UseMethod("download

download_control.numeric <- function(control, locations = countries) {
.test_batch(control)
terms <- keywords_control$keyword[keywords_control$batch == control]
time <- time_control$time[time_control$batch == control]
terms <- .keywords_control$keyword[.keywords_control$batch == control]
time <- .time_control$time[.time_control$batch == control]
walk(locations, ~ {
if (.test_empty(table = "data_control", batch_c = control, location = .x)) {
out <- .get_trend(location = .x, term = terms, time = time)
Expand Down
8 changes: 4 additions & 4 deletions R/download_global.r
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ download_global <- function(object) UseMethod("download_global", object)

download_global.numeric <- function(object) {
.test_batch(object)
terms <- keywords_object$keyword[keywords_object$batch == object]
terms <- terms[!(terms %in% keyword_synonyms$synonym)]
time <- time_object$time[time_object$batch == object]
terms <- .keywords_object$keyword[.keywords_object$batch == object]
terms <- terms[!(terms %in% .keyword_synonyms$synonym)]
time <- .time_object$time[.time_object$batch == object]
if (.test_empty(table = "data_global", batch_o = object)) {
out <- map_dfr(terms, ~ {
out <- .get_trend(location = "", term = .x, time = time)
Expand Down Expand Up @@ -74,7 +74,7 @@ download_global.numeric <- function(object) {
out <- expand_grid(out, tibble(type = c("score_obs", "score_sad", "score_trd")))
}
out <- mutate(out, batch = object)
message(glue("Successfully downloaded worldwide data | term: {current}/{total_terms} [{object}/{total_batches}]", current = which(terms == .x), total_terms = length(terms), total_batches = max(keywords_object$batch)))
message(glue("Successfully downloaded worldwide data | term: {current}/{total_terms} [{object}/{total_batches}]", current = which(terms == .x), total_terms = length(terms), total_batches = max(.keywords_object$batch)))
return(out)
})
out <- mutate(out, batch = object)
Expand Down
18 changes: 14 additions & 4 deletions R/download_mapping.r
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,27 @@ download_mapping.numeric <- function(control, object, locations = countries) {
walk(c(control, object), .test_batch)
walk(locations, ~ {
if (.test_empty(table = "data_mapping", batch_c = control, batch_o = object, location = .x)) {
qry_control <- filter(data_control, batch == control & location == .x)
qry_control <- filter(.tbl_control, batch == control & location == .x)
qry_control <- collect(qry_control)
qry_object <- filter(data_object, batch == object & location == .x)
qry_object <- filter(.tbl_object, batch == object & location == .x)
qry_object <- collect(qry_object)
if (nrow(qry_control) > 0 & nrow(qry_object) > 0) {
term_con <- summarise(group_by(qry_control, keyword), hits = mean(hits))
term_con <- term_con$keyword[order(term_con$hits)]
term_obj <- summarise(group_by(qry_object, keyword), hits = mean(hits))
term_obj <- term_obj$keyword[term_obj$hits == max(term_obj$hits)]
date_min <- as_date(max(min(qry_control$date), coalesce(min(qry_object$date), min(qry_control$date))))
date_max <- as_date(min(max(qry_control$date), coalesce(max(qry_object$date), max(qry_control$date))))
date_min <- as_date(
max(
min(qry_control$date),
coalesce(min(qry_object$date), min(qry_control$date))
)
)
date_max <- as_date(
min(
max(qry_control$date),
coalesce(max(qry_object$date), max(qry_control$date))
)
)
if (date_min < date_max) {
i <- 1
while (i <= length(term_con)) {
Expand Down
4 changes: 2 additions & 2 deletions R/download_object.r
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ download_object <- function(object, locations = countries) UseMethod("download_o

download_object.numeric <- function(object, locations = countries) {
.test_batch(object)
terms <- keywords_object$keyword[keywords_object$batch == object]
time <- time_object$time[time_object$batch == object]
terms <- .keywords_object$keyword[.keywords_object$batch == object]
time <- .time_object$time[.time_object$batch == object]
walk(locations, ~ {
if (.test_empty(table = "data_object", batch_o = object, location = .x)) {
out <- .get_trend(location = .x, term = terms, time = time)
Expand Down
41 changes: 35 additions & 6 deletions R/export_data.r
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@
#' @importFrom dplyr rename

export_control <- function(control = NULL) {
out <- .export_data_single(table = data_control, in_control = control)
out <- .export_data_single(
table = .tbl_control,
in_control = control
)
out <- rename(out, control = batch)
return(out)
}
Expand All @@ -70,7 +73,11 @@ export_control <- function(control = NULL) {
#' @export

export_object <- function(keyword = NULL, object = NULL) {
out <- .export_data_single(table = data_object, in_keyword = keyword, in_object = object)
out <- .export_data_single(
table = .tbl_object,
in_keyword = keyword,
in_object = object
)
out <- rename(out, object = batch)
return(out)
}
Expand All @@ -79,7 +86,12 @@ export_object <- function(keyword = NULL, object = NULL) {
#' @export

export_global <- function(keyword = NULL, object = NULL, type = NULL) {
out <- .export_data_single(table = data_global, in_keyword = keyword, in_object = object, in_type = type)
out <- .export_data_single(
table = .tbl_global,
in_keyword = keyword,
in_object = object,
in_type = type
)
out <- rename(out, object = batch)
return(out)
}
Expand All @@ -88,7 +100,12 @@ export_global <- function(keyword = NULL, object = NULL, type = NULL) {
#' @export

export_mapping <- function(keyword = NULL, object = NULL, control = NULL) {
out <- .export_data_double(table = data_mapping, in_keyword = keyword, in_object = object, in_control = control)
out <- .export_data_double(
table = .tbl_mapping,
in_keyword = keyword,
in_object = object,
in_control = control
)
out <- rename(out, control = batch_c, object = batch_o)
return(out)
}
Expand All @@ -97,7 +114,12 @@ export_mapping <- function(keyword = NULL, object = NULL, control = NULL) {
#' @export

export_score <- function(keyword = NULL, object = NULL, control = NULL) {
out <- .export_data_double(table = data_score, in_keyword = keyword, in_object = object, in_control = control)
out <- .export_data_double(
table = .tbl_score,
in_keyword = keyword,
in_object = object,
in_control = control
)
out <- rename(out, control = batch_c, object = batch_o)
return(out)
}
Expand All @@ -106,7 +128,14 @@ export_score <- function(keyword = NULL, object = NULL, control = NULL) {
#' @export

export_doi <- function(keyword = NULL, object = NULL, control = NULL, locations = NULL, type = NULL) {
out <- .export_data_double(table = data_doi, in_keyword = keyword, in_object = object, in_control = control, in_locations = locations, in_type = type)
out <- .export_data_double(
table = .tbl_doi,
in_keyword = keyword,
in_object = object,
in_control = control,
in_locations = locations,
in_type = type
)
out <- rename(out, control = batch_c, object = batch_o)
return(out)
}
Expand Down
Loading