Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions facebook/qsf-tools/combine_translations_eu.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env Rscript

## Combine a set of EU and non-EU translation files (UMD only), adding in a
## column indicating whether a given field was included in just the EU version,
## just the non-EU version, or in both.
##
## Usage:
##
## Rscript combine_translations_eu.R path/to/eu/translations/dir path/to/noneu/translations/dir path/to/combined/translations/dir

suppressPackageStartupMessages({
library(tidyverse)
source("qsf-utils.R")
})


combine_translation_pair <- function(eu_translation,
noneu_translation) {
translation <- bind_rows(eu_translation, noneu_translation) %>%
mutate(eu_noneu = case_when(
startsWith(PhraseID, "intro1_eu") ~ "EU",
startsWith(PhraseID, "intro2_eu") ~ "EU",
startsWith(PhraseID, "intro1_noneu") ~ "nonEU",
startsWith(PhraseID, "intro2_noneu") ~ "nonEU",
TRUE ~ "Both"
))
return(translation)
}

combine_translations <- function(path_to_eu_translations,
path_to_noneu_translations,
path_to_combined) {
eu_name_pattern <- "_eu_"
if (!grepl(eu_name_pattern, path_to_eu_translations)) {
stop(path_to_eu_translations, "does not specify that it is for the EU")
}
noneu_name_pattern <- "_noneu_"
if (!grepl(noneu_name_pattern, path_to_noneu_translations)) {
stop(path_to_noneu_translations, "does not specify that it is for the non-EU")
}

eu_files <- list.files(path_to_eu_translations, pattern = "*.csv$", full.names = TRUE)
eu_translations <- list()
for (filename in eu_files) {
eu_translations[[as.character(get_wave_from_csv(filename))]] <- read_csv(filename, show_col_types = FALSE) %>%
filter(startsWith(PhraseID, "intro"))
}

noneu_files <- list.files(path_to_noneu_translations, pattern = "*.csv$", full.names = TRUE)
noneu_translations <- list()
for (filename in noneu_files) {
noneu_translations[[as.character(get_wave_from_csv(filename))]] <- read_csv(filename, show_col_types = FALSE) %>%
# Drop response options for the country + region question, they take up way too much space.
filter(
!startsWith(PhraseID, "A2_3_Answer"),
!startsWith(PhraseID, "A2_2_Answer"),
!startsWith(PhraseID, "NA_")
)
}

if (!identical(sort(names(eu_translations)), sort(names(noneu_translations)))) {
stop("not all waves are available for both EU and non-EU")
}

dir.create(path_to_combined, showWarnings = FALSE)
for (wave in names(eu_translations)) {
combined <- combine_translation_pair(
eu_translations[[wave]],
noneu_translations[[wave]]
)

write_excel_csv(
combined,
file.path(
path_to_combined,
sprintf("umd_ctis_combined_v%02g_translations.csv", as.numeric(wave))
),
quote = "needed")
}
}


args <- commandArgs(TRUE)

if (length(args) != 3) {
stop("Usage: Rscript combine_translations_eu.R path/to/eu/translations/dir path/to/noneu/translations/dir path/to/combined/translations/dir")
}

path_to_eu_translations <- args[1]
path_to_noneu_translations <- args[2]
path_to_combined <- args[3]

invisible(combine_translations(path_to_eu_translations, path_to_noneu_translations, path_to_combined))


4 changes: 4 additions & 0 deletions facebook/qsf-tools/generate-codebook.R
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ process_qsf <- function(path_to_qsf,

# format all qsf content lists into a single tibble
qdf <- tibble(variable = item_names,
qid = qids,
question = questions,
question_type = qtype,
response_options = choices,
Expand Down Expand Up @@ -360,6 +361,7 @@ process_qsf <- function(path_to_qsf,
mutate(new = list(
tibble(matrix_base_name = variable,
variable = unlist(matrix_subquestion_field_names),
qid = qid,
question = question,
matrix_subquestion = unlist(matrix_subquestions),
question_type = question_type,
Expand All @@ -381,6 +383,7 @@ process_qsf <- function(path_to_qsf,
mutate(new = list(
tibble(matrix_base_name = variable,
variable = unlist(matrix_subquestion_field_names),
qid = qid,
question = question,
matrix_subquestion = unlist(matrix_subquestions),
question_type = question_type,
Expand Down Expand Up @@ -426,6 +429,7 @@ process_qsf <- function(path_to_qsf,
) %>%
select(wave,
variable,
qid,
matrix_base_name,
replaces,
description,
Expand Down
26 changes: 26 additions & 0 deletions facebook/qsf-tools/qsf-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,32 @@ get_wave <- function(path_to_qsf) {
return(wave)
}

#' Get wave number from qsf filename
#'
#' Wave number as provided in the qsf name should be an integer or a float with
#' one decimal place.
#'
#' @param path_to_file
#'
#' @return (mostly) integer wave number
get_wave_from_csv <- function(path_to_file) {
name_pattern <- "(.*[Ww]ave_?)([0-9]*([.][0-9])?)(.*csv.*)"
if (!grepl(name_pattern, path_to_file)) {
stop(
"The CSV filename must include the string 'csv', and the wave number in ",
"the format 'Wave_XX', 'WaveXX', 'wave_XX', or 'waveXX' where 'XX' is an ",
"integer or float. The wave specification can occur anywhere in the ",
"filename but must precede the string 'csv'."
)
}

wave <- as.numeric(
sub(name_pattern, "\\2", path_to_file)
)

return(wave)
}

#' Create mapping of QIDs to module name
#'
#' @param qsf contents of QSF file in JSON format
Expand Down
73 changes: 73 additions & 0 deletions facebook/qsf-tools/replace_translation_qids.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env Rscript

## In translation CSVs, replace the QID in the name column with the human-readable
## item name (e.g. A1). Export modified translation CSVs in the same format as the
## original.
##
## Usage:
##
## Rscript replace_translation_qids.R path/to/translation/directory/or/single/translation/CSV path/to/codebook

suppressPackageStartupMessages({
library(tidyverse)
library(purrr)
library(stringr)
source("qsf-utils.R")
})


replace_qid_wrapper <- function(path_to_translations, path_to_codebook) {
if (dir.exists(path_to_translations)) {
# Process all CSVs in directory
csvs <- list.files(path_to_translations, pattern = "*.csv$", full.names = TRUE)
for (csv in csvs) {
replace_qids(csv, path_to_codebook)
}
} else if (file.exists(path_to_translations)) {
replace_qids(path_to_translations, path_to_codebook)
} else {
stop(path_to_translations, " is not a valid file or directory")
}
}

replace_qids <- function(path_to_translation_file, path_to_codebook) {
wave <- get_wave_from_csv(path_to_translation_file)
# Load codebook
codebook <- read_csv(path_to_codebook, col_types = cols(
.default = col_character(),
version = col_double()
)) %>%
filter(!is.na(qid), version == wave)

# Load translation file
translation <- read_csv(path_to_translation_file, show_col_types = FALSE) %>%
# Drop survey ID line
filter(!startsWith(PhraseID, "SV_"))

# Use codebook to make a mapping of QID -> item name.
var_qid_pairs <- codebook %>% mutate(variable = coalesce(matrix_base_name, variable)) %>% distinct(qid, variable)
qid_item_map <- var_qid_pairs %>% pull(variable)
names(qid_item_map) <- var_qid_pairs %>% pull(qid)

# Use QID-name mapping to replace QID in first column.
ii_qid <- startsWith(translation$PhraseID, "QID")
translation[ii_qid,] <- translation[ii_qid,] %>% mutate(
PhraseID = str_replace(PhraseID, "(^QID[0-9]*)_", function(match) {
paste0(qid_item_map[str_sub(match, 1, -2)], "_")
})
)

# Save processed file back to CSV under the same name.
write_excel_csv(translation, path_to_translation_file, quote = "needed")
}

args <- commandArgs(TRUE)

if (!(length(args) %in% c(2))) {
stop("Usage: Rscript replace_translation_qids.R path/to/translation/directory/or/single/translation/CSV path/to/codebook")
}

path_to_translations <- args[1]
path_to_codebook <- args[2]

invisible(replace_qid_wrapper(path_to_translations, path_to_codebook))