Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R/aux_data_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,7 @@ delete_files_from_s3 <- function(keys, bucket, batch_size = 500, .progress = TRU
get_nhsn_data_archive <- function(disease_name) {
aws.s3::s3read_using(nanoparquet::read_parquet, object = "nhsn_data_archive.parquet", bucket = "forecasting-team-data") %>%
filter(disease == disease_name) %>%
filter(!grepl("region.*", geo_value)) %>%
select(-version_timestamp, -disease) %>%
as_epi_archive(compactify = TRUE)
}
Expand Down
10 changes: 9 additions & 1 deletion R/targets/score_targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,19 @@ score_forecasts <- function(nhsn_latest_data, joined_forecasts_and_ensembles) {
drop_na() %>%
rename(location = state_code) %>%
select(-geo_value)
# limit the forecasts to the same set of forecasting times
max_forecast_date <-
joined_forecasts_and_ensembles %>%
group_by(forecaster) %>%
summarize(max_forecast = max(forecast_date)) %>%
pull(max_forecast) %>%
min()
forecasts_formatted <-
joined_forecasts_and_ensembles %>%
filter(forecast_date <= max_forecast_date) %>%
format_scoring_utils(disease = "covid")
scores <- forecasts_formatted %>%
filter(location != "US") %>%
filter(location %nin% c("US", "60", "66", "78")) %>%
hubEvals::score_model_out(
truth_data,
metrics = c("wis", "ae_median", "interval_coverage_50", "interval_coverage_90"),
Expand Down
15 changes: 9 additions & 6 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,10 @@ data_substitutions <- function(dataset, substitutions_path, forecast_generation_
) %>%
filter(forecast_date == forecast_generation_date) %>%
select(-forecast_date) %>%
rename(new_value = value) %>%
select(-time_value)
rename(new_value = value)
# Replace the most recent values in the appropriate keys with the substitutions
new_values <- dataset %>%
group_by(geo_value) %>%
slice_max(time_value) %>%
inner_join(substitutions, by = "geo_value") %>%
inner_join(substitutions, by = join_by(geo_value, time_value)) %>%
mutate(value = ifelse(!is.na(new_value), new_value, value)) %>%
select(-new_value)
# Remove keys from dataset that have been substituted
Expand Down Expand Up @@ -383,8 +380,14 @@ update_site <- function(sync_to_s3 = TRUE) {
slice_max(generation_date)
# iterating over the diseases
for (row_num in seq_along(used_files$filename)) {
file_name <- path_file(used_files$filename[[row_num]])
scoring_index <- which(grepl("### Scoring this season", report_md_content)) + 1
score_link <- sprintf("- [%s Scoring, Rendered %s](%s)", str_to_title(used_files$disease[[row_num]]), used_files$generation_date[[row_num]], used_files$filename[[row_num]])
score_link <- sprintf(
"- [%s Scoring, Rendered %s](%s)",
str_to_title(used_files$disease[[row_num]]),
used_files$generation_date[[row_num]],
file_name
)
report_md_content <- append(report_md_content, score_link, after = scoring_index)
}
}
Expand Down
Loading
Loading