diff --git a/facebook/delphiFacebook/DESCRIPTION b/facebook/delphiFacebook/DESCRIPTION index beda72167..875d9dcde 100644 --- a/facebook/delphiFacebook/DESCRIPTION +++ b/facebook/delphiFacebook/DESCRIPTION @@ -15,6 +15,7 @@ Imports: rlang, readr, dplyr, + plyr, tidyr, stringi, jsonlite, diff --git a/facebook/delphiFacebook/NAMESPACE b/facebook/delphiFacebook/NAMESPACE index cfef1c937..b3ce270d7 100644 --- a/facebook/delphiFacebook/NAMESPACE +++ b/facebook/delphiFacebook/NAMESPACE @@ -98,6 +98,7 @@ importFrom(lubridate,ymd) importFrom(lubridate,ymd_hms) importFrom(parallel,detectCores) importFrom(parallel,mclapply) +importFrom(plyr,round_any) importFrom(purrr,reduce) importFrom(readr,col_character) importFrom(readr,col_integer) diff --git a/facebook/delphiFacebook/R/contingency_aggregate.R b/facebook/delphiFacebook/R/contingency_aggregate.R index 054b980b0..beb25493a 100644 --- a/facebook/delphiFacebook/R/contingency_aggregate.R +++ b/facebook/delphiFacebook/R/contingency_aggregate.R @@ -66,6 +66,12 @@ produce_aggregates <- function(df, aggregations, cw_list, params) { geo_level <- agg_groups$geo_level[group_ind] geo_crosswalk <- cw_list[[geo_level]] + if (geo_level == "county") { + # Raise sample size threshold to 100. + old_thresh <- params$num_filter + params$num_filter <- 100L + } + # Subset aggregations to keep only those grouping by the current agg_group # and with the current geo_level. `setequal` ignores differences in # ordering and only looks at unique elements. @@ -108,6 +114,11 @@ produce_aggregates <- function(df, aggregations, cw_list, params) { } } } + + if (geo_level == "county") { + # Restore old sample size threshold + params$num_filter <- old_thresh + } } } @@ -307,10 +318,14 @@ summarize_aggs <- function(df, crosswalk_data, aggregations, geo_level, params) rowSums(is.na(dfs_out[[aggregation]][, c("val", "sample_size")])) == 0, ] + # Censor rows with low sample size dfs_out[[aggregation]] <- apply_privacy_censoring(dfs_out[[aggregation]], params) - ## Apply the post-function + # Apply the post-function dfs_out[[aggregation]] <- post_fn(dfs_out[[aggregation]]) + + # Round sample sizes + dfs_out[[aggregation]] <- round_n(dfs_out[[aggregation]], params) } return(dfs_out) diff --git a/facebook/delphiFacebook/R/contingency_privacy.R b/facebook/delphiFacebook/R/contingency_privacy.R index 81b764da9..3a122a751 100644 --- a/facebook/delphiFacebook/R/contingency_privacy.R +++ b/facebook/delphiFacebook/R/contingency_privacy.R @@ -1,7 +1,7 @@ #' Censor aggregates to ensure privacy. #' -#' Currently done in simple, static way: Rows with sample size less than 100 are -#' removed; no noise is added. +#' Currently done in simple, static way: Rows with sample size less than num_filter +#' are removed; no noise is added. #' #' @param df a data frame of summarized response data #' @param params a named list with entries "s_weight", "s_mix_coef", @@ -16,3 +16,17 @@ apply_privacy_censoring <- function(df, params) { .data$sample_size >= params$num_filter, .data$effective_sample_size >= params$num_filter)) } + +#' Round sample sizes to nearest 5. +#' +#' @param df a data frame of summarized response data +#' @param params a named list with entries "s_weight", "s_mix_coef", +#' "num_filter" +#' +#' @importFrom plyr round_any +round_n <- function(df, params) { + return(mutate(df, + sample_size = round_any(.data$sample_size, 5), + effective_sample_size = round_any(.data$effective_sample_size, 5) + )) +} diff --git a/facebook/delphiFacebook/R/contingency_utils.R b/facebook/delphiFacebook/R/contingency_utils.R index b0f3368ec..5a5cfd5fe 100644 --- a/facebook/delphiFacebook/R/contingency_utils.R +++ b/facebook/delphiFacebook/R/contingency_utils.R @@ -36,7 +36,7 @@ read_contingency_params <- function(path = "params.json", template_path = "param } } - contingency_params$num_filter <- if_else(contingency_params$debug, 2L, 100L) + contingency_params$num_filter <- if_else(contingency_params$debug, 2L, 40L) contingency_params$s_weight <- if_else(contingency_params$debug, 1.00, 0.01) contingency_params$s_mix_coef <- if_else(contingency_params$debug, 0.05, 0.05) contingency_params$use_input_asis <- if_else( diff --git a/facebook/delphiFacebook/man/apply_privacy_censoring.Rd b/facebook/delphiFacebook/man/apply_privacy_censoring.Rd index 0f83c7d4b..405e0ea13 100644 --- a/facebook/delphiFacebook/man/apply_privacy_censoring.Rd +++ b/facebook/delphiFacebook/man/apply_privacy_censoring.Rd @@ -13,6 +13,6 @@ apply_privacy_censoring(df, params) "num_filter"} } \description{ -Currently done in simple, static way: Rows with sample size less than 100 are -removed; no noise is added. +Currently done in simple, static way: Rows with sample size less than num_filter +are removed; no noise is added. } diff --git a/facebook/delphiFacebook/man/round_n.Rd b/facebook/delphiFacebook/man/round_n.Rd new file mode 100644 index 000000000..b310116f2 --- /dev/null +++ b/facebook/delphiFacebook/man/round_n.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/contingency_privacy.R +\name{round_n} +\alias{round_n} +\title{Round sample sizes to nearest 5.} +\usage{ +round_n(df, params) +} +\arguments{ +\item{df}{a data frame of summarized response data} + +\item{params}{a named list with entries "s_weight", "s_mix_coef", +"num_filter"} +} +\description{ +Round sample sizes to nearest 5. +}