Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
89 commits
Select commit Hold shift + click to select a range
fd14e57
use effective samples size for binary SEs
nmdefries May 26, 2021
82100c1
Merge branch 'main' into fb-package-weight-binary-ses
nmdefries Oct 19, 2021
6a08698
generalize eff_n calculation so can drop assert
nmdefries Oct 19, 2021
69c5909
combine loops
nmdefries Oct 19, 2021
5d957b4
update unit tests
nmdefries Oct 19, 2021
1ae97c4
update integration tests
nmdefries Oct 19, 2021
987f0ae
adjust prop with Jeffreys prior on n_eff
nmdefries Oct 26, 2021
90c12e3
update binary_se formula
nmdefries Nov 12, 2021
f13f9c7
test that values match online docs
nmdefries Oct 26, 2021
99c681b
repair integration tests and make new gold test files
nmdefries Nov 12, 2021
b7fa57a
Merge branch 'main' into fb-package-weight-binary-ses
nmdefries Dec 6, 2021
873de82
mix weights for all indicators, deprecate skip_mixing
nmdefries Nov 18, 2021
3b65ec4
update gold receiving
nmdefries Dec 6, 2021
845da21
add mixing to binary ind tests
nmdefries Dec 6, 2021
9a9382a
Merge pull request #1372 from cmu-delphi/survey-comprehensive-mixing
nmdefries Dec 16, 2021
cbd6d14
Merge branch 'main' into fb-package-weight-binary-ses
nmdefries Feb 4, 2022
a09c72e
V2a "received all doses"
nmdefries Jun 10, 2022
3da6946
Q36 "yes financial threat"
nmdefries Jun 10, 2022
f41aff0
C14a "wear mask + some of the time"
nmdefries Jun 10, 2022
71a985c
H1 people distancing + "some of the time"
nmdefries Jun 10, 2022
320420c
C16/H2 people masked + "some of the time"
nmdefries Jun 10, 2022
081b8c1
H3 friends vaccinated + "some"
nmdefries Jun 10, 2022
a150e3a
C17* historical flu vaccines
nmdefries Jun 10, 2022
facc765
C2 flu vaccine last 12m
nmdefries Jun 10, 2022
f3bd939
mental health + sometimes
nmdefries Jun 10, 2022
29a8baa
C11/C12 historical in contact with COVID-+ person
nmdefries Jun 13, 2022
fe26aa4
all work_outside_home varieties
nmdefries Jun 13, 2022
918fd8e
work with high-risk populations
nmdefries Jun 13, 2022
9a65ac6
reasons why people are not planning to complete the vaccine series
nmdefries Jun 13, 2022
0420df6
reasons why not tested for covid
nmdefries Jun 13, 2022
f09d2dc
B* all followups to unusual symptoms question
nmdefries Jun 14, 2022
0df4d15
D10 work for pay outside home among those working
nmdefries Jun 14, 2022
23758e6
define variables we want to find the mean for
nmdefries Jun 14, 2022
bca27e8
define mean + percentiles calc function
nmdefries Jun 14, 2022
cf2ef61
test mean calculation
nmdefries Jun 15, 2022
7148e84
move new mental health inds to contingency-only func
nmdefries Jun 15, 2022
a6ede6b
move new flu shot inds to contingency-only func
nmdefries Jun 15, 2022
fbb681b
move vaccine incomplete reasons to contingency-only
nmdefries Jun 15, 2022
4b6687e
test some alt masking inds
nmdefries Jun 15, 2022
ad23a3d
overall vaccine hesitancy
nmdefries Jun 16, 2022
1b529a5
unusual symptom followup change meaning
nmdefries Jun 17, 2022
5ef3105
calc more demographics
nmdefries Jun 16, 2022
e29eac7
mean hh member by age from A5_123
nmdefries Jun 21, 2022
6aaec84
mean number of hh members from D3-5
nmdefries Jun 17, 2022
39b3400
unknown gender; child age cuts
nmdefries Jun 16, 2022
aecf0fd
add overall county indicators; remove monthly-only
nmdefries Jun 21, 2022
85564cc
make theme tables
nmdefries Jun 16, 2022
0a981e5
county metadata
nmdefries Jun 17, 2022
dd384ff
swap stringr for stringi
nmdefries Jun 22, 2022
c653918
tests
nmdefries Jun 22, 2022
4785af5
lower overall sample size threshold
nmdefries Jun 23, 2022
0ccfaa8
raise n thres only for county
nmdefries Jun 23, 2022
a51df29
round all sample sizes
nmdefries Jun 23, 2022
1d396fb
tests missing theme tables
nmdefries Jun 23, 2022
fd3f4d2
Merge branch 'ndefries/theme-tables' into ndefries/lower-contingency-…
nmdefries Jun 23, 2022
a98d4bd
filter out gender self-described responses
nmdefries Jun 24, 2022
7885d0b
return empty result if n=1 to avoid svydesign error
nmdefries Jun 24, 2022
c72d452
respect parallel_max_cores
nmdefries Jun 24, 2022
6760ce3
retain field names when adding geo info
nmdefries Jun 24, 2022
ebcb8a4
Update facebook/delphiFacebook/R/responses.R
nmdefries Jun 27, 2022
78c8d4d
combine select and mutate
nmdefries Jun 27, 2022
09b1648
script to amend monthly rollup microdata
nmdefries Jun 28, 2022
b9f5f93
make hyphen optional in pattern
nmdefries Jun 28, 2022
1ba94ef
check that state is missing where fips is missing
nmdefries Jun 29, 2022
e7bd51b
drop responses from territories
nmdefries Jun 29, 2022
c7186eb
blank zips with low population
nmdefries Jun 29, 2022
51760a3
Merge pull request #1656 from cmu-delphi/bot/sync-prod-main
krivard Jul 7, 2022
7e5c8a4
Merge pull request #1650 from cmu-delphi/ndefries/microdata-state-col
krivard Jul 11, 2022
a558503
High blood pressure code
nmdefries Jul 11, 2022
135efce
name symp_other_unusual
nmdefries Jul 11, 2022
e39c044
make hh_direct_contact out of all respondents
nmdefries Jul 11, 2022
5044a06
ignore idk from E3 school measures
nmdefries Jul 11, 2022
07eceff
ignore B7 in wave 10
nmdefries Jul 13, 2022
501120a
local geo map missing some zips, switch to covidcast
nmdefries Jul 14, 2022
f89e7ff
Merge pull request #1647 from cmu-delphi/ndefries/county-tables-metadata
nmdefries Jul 15, 2022
e73ff14
Merge pull request #1646 from cmu-delphi/ndefries/lower-contingency-t…
nmdefries Jul 15, 2022
bf96a5f
Merge pull request #1649 from cmu-delphi/ndefries/gender-freeresponse…
nmdefries Jul 15, 2022
d957a3d
Merge pull request #1645 from cmu-delphi/ndefries/theme-tables
nmdefries Jul 15, 2022
69333de
Merge pull request #1635 from cmu-delphi/ndefries/archival-new-inds
krivard Jul 15, 2022
9f53a6f
drop invalid zips instead of keeping valid zips
nmdefries Jul 15, 2022
d96deff
filter out territories even if fips is missing
nmdefries Jul 15, 2022
10e3fa6
add required packages to dockerfile
nmdefries Jul 15, 2022
e6b81b3
Merge pull request #1658 from cmu-delphi/ndefries/ctis-docker-packages
krivard Jul 15, 2022
8065c6a
Merge pull request #1065 from cmu-delphi/fb-package-weight-binary-ses
krivard Jul 15, 2022
99e63c4
Merge pull request #1657 from cmu-delphi/ndefries/amend-microdata-zip…
krivard Jul 15, 2022
c4d88ea
Update Sircal to Silence Safegraph alerts
neul3 Jul 18, 2022
7ea598a
Update Dev Parameters to remove Safegraph Alerts
neul3 Jul 18, 2022
129c7fd
Merge pull request #1659 from cmu-delphi/neul3/templates
krivard Jul 20, 2022
9eac4b1
chore: bump covidcast-indicators to 0.3.19
Jul 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.18
current_version = 0.3.19
commit = True
message = chore: bump covidcast-indicators to {new_version}
tag = False
5 changes: 0 additions & 5 deletions ansible/templates/sir_complainsalot-params-prod.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@
"maintainers": ["U01AP8GSWG3","U01069KCRS7"],
"retired-signals": ["confirmed_7dav_cumulative_num", "confirmed_7dav_cumulative_prop", "deaths_7dav_cumulative_num", "deaths_7dav_cumulative_prop"]
},
"safegraph": {
"max_age": 11,
"maintainers": ["U01AP8GSWG3","U01069KCRS7"],
"retired-signals": ["completely_home_prop", "full_time_work_prop", "part_time_work_prop", "median_home_dwell_time", "completely_home_prop_7dav", "full_time_work_prop_7dav", "part_time_work_prop_7dav", "median_home_dwell_time_7dav"]
},
"quidel": {
"max_age":6,
"maintainers": ["U01AP8GSWG3","U01069KCRS7"],
Expand Down
2 changes: 2 additions & 0 deletions facebook/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ RUN install2.r --error \
roxygen2 \
Rcpp \
BH \
plyr \
survey \
readr

RUN apt-get update && apt-get install -qq -y \
Expand Down
128 changes: 128 additions & 0 deletions facebook/amend_monthly_microdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env Rscript

## Modify monthly microdata. Add state column. Rename `wave` field to `version`.
##
## Usage:
##
## Rscript microdata_add_state_col__rename_wave.R path/to/individual/files/ path/to/output/dir/ [/path/to/static/dir/]
##
## Writes the processed files to the specified directory under the original file name.

suppressPackageStartupMessages({
library(dplyr)
library(readr)
library(rlang)
library(stringi)
library(covidcast)
library(delphiFacebook)
})

amend_microdata <- function(input_dir, output_dir, static_dir, pattern = ".*[.]csv[.]gz$") {
# Create mapping of county FIPS codes to state postal codes.
zips <- read_csv(
file.path(static_dir, "02_20_uszips.csv"),
col_types = cols(.default = "c", population = "i")
) %>%
mutate(
fips = stri_pad(.data$fips, 5, pad="0"),
zip = stri_pad(.data$zip, 5, pad="0")
)
invalid_zips <- zips %>%
filter(population <= 100) %>%
pull(zip)
territory_zips <- zips %>%
filter(state_id %in% c("AS", "GU", "PR", "VI", "MP")) %>%
pull(zip)

# Read in each monthly file from the microdata directory.
for (fname in list.files(input_dir, pattern = pattern)) {
# Read in file.
# stop readr from thinking commas = thousand separators,
# and from inferring column types incorrectly
message("reading data in")
data <- read_csv(file.path(input_dir, fname), locale = locale(grouping_mark = ""),
col_types = cols(
.default = col_character())) %>%
# Rename `wave` field.
rename(version = .data$wave) %>%
create_zip5()

# Add state column based on county FIPS code.
data <- mutate(data, state = state_fips_to_name(substr(fips, 1, 2)) %>% name_to_abbr())

assert(
all(is.na(data$fips) == is.na(data$state)),
"fips and state fields are not missing in the same places"
)

# Drop any territories.
data <- filter(data,
!(.data$state %in% c("AS", "GU", "PR", "VI", "MP")),
# If fips not available and state didn't get filled in.
!(.data$zip5 %in% territory_zips)
)

# what zip5 values have a large enough population (>100) to include in micro
# output. Those with too small of a population are blanked to NA
data <- blank_zips(data, invalid_zips, fname)

# Save file under original name but in output directory.
message("writing data for ", fname)
write_csv(data, file.path(output_dir, fname))
}
}

create_zip5 <- function(data) {
data$zip5 <- data$A3

# clean the ZIP data
data$zip5 <- stri_replace_all(data$zip5, "", regex = " *")
data$zip5 <- stri_replace(data$zip5, "", regex ="-.*")

# some people enter 9-digit ZIPs, which could make them easily identifiable in
# the individual output files. rather than truncating to 5 digits -- which may
# turn nonsense entered by some respondents into a valid ZIP5 -- we simply
# replace these ZIPs with NA.
data$zip5 <- ifelse(nchar(data$zip5) > 5, NA_character_,
data$zip5)

return(data)
}

blank_zips <- function(data, invalid_zips, fname) {
change_zip <- (data$zip5 %in% invalid_zips)
# Population-based blanking of zip codes was implemented in late May 2020. For
# later files, we shouldn't be blanking any new obs.
if (sum(change_zip) > 0) {
warning("trying to remove obs with invalid zip via population")
print(fname)
print(head(data[change_zip,] %>% select(zip5, fips, state)))
}
data$A3[change_zip] <- NA

data <- select(data, -zip5)

return(data)
}

args <- commandArgs(TRUE)

if (!(length(args) %in% c(2, 3))) {
stop("Usage: Rscript microdata_add_state_col__rename_wave.R path/to/individual/files/ path/to/output/dir/ [/path/to/static/dir/]")
}

input_dir <- args[1]
output_dir <- args[2]

if (length(args) == 3) {
static_dir <- args[3]
} else {
static_dir <- "static"
}

# Specifies monthly microdata rollup naming scheme like "YYYY-MM.csv.gz" and the
# race-ethnicity version "YYYY-MM-race-ethnicity.csv.gz"
pattern <- "^202[0-9]-[0-9]{2}(-race-ethnicity)?[.]csv[.]gz$"

amend_microdata(input_dir, output_dir, static_dir, pattern = pattern)

7 changes: 5 additions & 2 deletions facebook/delphiFacebook/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@ Imports:
rlang,
readr,
dplyr,
plyr,
tidyr,
stringi,
jsonlite,
lubridate,
data.table,
tibble,
purrr,
Rcpp
Rcpp,
survey
Suggests:
knitr (>= 1.15),
rmarkdown (>= 1.4),
testthat (>= 1.0.1),
covr (>= 2.2.2)
LinkingTo: Rcpp
RoxygenNote: 7.1.1
RoxygenNote: 7.2.0
Encoding: UTF-8
8 changes: 8 additions & 0 deletions facebook/delphiFacebook/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,coalesce)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
Expand Down Expand Up @@ -98,6 +99,7 @@ importFrom(lubridate,ymd)
importFrom(lubridate,ymd_hms)
importFrom(parallel,detectCores)
importFrom(parallel,mclapply)
importFrom(plyr,round_any)
importFrom(purrr,reduce)
importFrom(readr,col_character)
importFrom(readr,col_integer)
Expand All @@ -115,13 +117,19 @@ importFrom(stats,setNames)
importFrom(stats,weighted.mean)
importFrom(stringi,stri_extract)
importFrom(stringi,stri_extract_first)
importFrom(stringi,stri_pad)
importFrom(stringi,stri_replace)
importFrom(stringi,stri_replace_all)
importFrom(stringi,stri_split)
importFrom(stringi,stri_sub)
importFrom(stringi,stri_trans_tolower)
importFrom(stringi,stri_trim)
importFrom(survey,oldsvyquantile)
importFrom(survey,svydesign)
importFrom(survey,svymean)
importFrom(survey,svyvar)
importFrom(tibble,add_column)
importFrom(tibble,tribble)
importFrom(tidyr,drop_na)
importFrom(utils,tail)
useDynLib(delphiFacebook, .registration = TRUE)
11 changes: 4 additions & 7 deletions facebook/delphiFacebook/R/aggregate.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,9 @@ summarize_indicators_day <- function(day_df, indicators, target_day, geo_level,

sample_size <- sum(ind_df$weight_in_location)

## TODO: See issue #764
new_row <- compute_fn(
response = ind_df[[metric]],
weight = if (indicators$skip_mixing[row]) { mixing$normalized_preweights } else { mixing$weights },
weight = mixing$weights,
sample_size = sample_size)

dfs_out[[indicator]][["val"]][ii] <- new_row$val
Expand All @@ -217,15 +216,13 @@ summarize_indicators_day <- function(day_df, indicators, target_day, geo_level,
}
}

# Convert list of lists to list of tibbles.
for (indicator in indicators$name) {
dfs_out[[indicator]] <- bind_rows(dfs_out[[indicator]])
}

for (row in seq_len(nrow(indicators))) {
indicator <- indicators$name[row]
post_fn <- indicators$post_fn[[row]]

# Convert list of lists to list of tibbles.
dfs_out[[indicator]] <- bind_rows(dfs_out[[indicator]])

dfs_out[[indicator]] <- dfs_out[[indicator]][
rowSums(is.na(dfs_out[[indicator]][, c("val", "sample_size", "geo_id", "day")])) == 0,
]
Expand Down
17 changes: 8 additions & 9 deletions facebook/delphiFacebook/R/binary.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ get_binary_indicators <- function() {

# work outside home
# pre-wave 4
"wip_smoothed_work_outside_home_5d", "weight_unif", "c_work_outside_5d", 6, compute_binary_response, jeffreys_binary,
"wip_smoothed_wwork_outside_home_5d", "weight", "c_work_outside_5d", 6, compute_binary_response, jeffreys_binary,
"smoothed_work_outside_home_5d", "weight_unif", "c_work_outside_5d", 6, compute_binary_response, jeffreys_binary,
"smoothed_wwork_outside_home_5d", "weight", "c_work_outside_5d", 6, compute_binary_response, jeffreys_binary,
# wave 4+, pre-wave 10
"smoothed_work_outside_home_1d", "weight_unif", "a_work_outside_home_1d", 6, compute_binary_response, jeffreys_binary,
"smoothed_wwork_outside_home_1d", "weight", "a_work_outside_home_1d", 6, compute_binary_response, jeffreys_binary,
Expand Down Expand Up @@ -420,9 +420,6 @@ get_binary_indicators <- function() {
"smoothed_wwant_info_none", "weight", "i_want_info_none", 6, compute_binary_response, jeffreys_binary
)


ind$skip_mixing <- TRUE

return(ind)
}

Expand All @@ -449,10 +446,12 @@ compute_binary_response <- function(response, weight, sample_size)
response_prop <- weighted.mean(response, weight)

val <- 100 * response_prop

effective_sample_size <- length(weight) * mean(weight)^2 / mean(weight^2)

return(list(val = val,
se = NA_real_,
effective_sample_size = sample_size)) # TODO effective sample size
effective_sample_size = effective_sample_size))
}

#' Apply a Jeffreys correction to estimates and their standard errors.
Expand All @@ -477,8 +476,8 @@ jeffreys_multinomial_factory <- function(k) {
# Returns: Updated data frame.
jeffreys_multinomial <- function(df) {
return(mutate(df,
val = jeffreys_percentage(.data$val, .data$sample_size, k),
se = binary_se(.data$val, .data$sample_size)))
val = jeffreys_percentage(.data$val, .data$effective_sample_size, k),
se = binary_se(.data$val, .data$effective_sample_size)))
}

return(jeffreys_multinomial)
Expand Down Expand Up @@ -506,6 +505,6 @@ jeffreys_percentage <- function(percentage, sample_size, k) {
#' @return Vector of standard errors; NA when a sample size is 0.
binary_se <- function(val, sample_size) {
return(ifelse(sample_size > 0,
sqrt( (val * (100 - val) / sample_size) ),
sqrt( val * (100 - val) / (sample_size + 1) ),
NA))
}
Loading