hubverse-org · lshandross · Mar 8, 2024 · Feb 15, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -14,4 +14,5 @@
 ^\.Rdata$
 ^\.httr-oauth$
 ^\.secrets$
+^\.lintr$
 ^data-raw$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -29,7 +29,7 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -39,19 +39,6 @@ jobs:
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
 
-      - name: Cache R packages
-        uses: actions/cache@v1
-        with:
-          path: ${{ env.R_LIBS_USER }}
-          key: r-${{ hashFiles('DESCRIPTION') }}
-
-      - name: Install dependencies
-        run: |
-          install.packages(c("remotes","rmarkdown","dplyr","purrr","tidyr","tidyselect"))
-          remotes::install_github("reichlab/distfromq")
-          remotes::install_deps(dependencies = NA)
-        shell: Rscript {0}
-
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           extra-packages: any::rcmdcheck

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -0,0 +1,32 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: lint
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::lintr, local::.
+          needs: lint
+
+      - name: Lint
+        run: lintr::lint_package()
+        shell: Rscript {0}
+        env:
+          LINTR_ERROR_ON_LINT: true
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,50 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: test-coverage
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr
+          needs: coverage
+
+      - name: Test coverage
+        run: |
+          covr::codecov(
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+          )
+        shell: Rscript {0}
+
+      - name: Show testthat output
+        if: always()
+        run: |
+          ## --------------------------------------------------------------------
+          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+        shell: bash
+
+      - name: Upload test results
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package
diff --git a/.lintr b/.lintr
@@ -0,0 +1,4 @@
+linters: linters_with_defaults(
+    line_length_linter = line_length_linter(120L),
+    commented_code_linter = NULL
+  )
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: hubEnsembles
 Title: Ensemble methods for combining hub model outputs
-Version: 0.1.0
+Version: 0.1.1
 Authors@R: c(
     person(given = "Evan L",
            family = "Ray",
@@ -24,6 +24,7 @@ Description: Functions for combining model outputs (e.g. predictions or
 License: MIT + file LICENSE
 VignetteBuilder: knitr
 Suggests: 
+    hubData,
     knitr,
     plotly,
     rmarkdown,
@@ -32,7 +33,7 @@ Suggests:
 Config/testthat/edition: 3
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 URL: https://github.com/Infectious-Disease-Modeling-Hubs/hubEnsembles,
     https://infectious-disease-modeling-hubs.github.io/hubEnsembles/
 BugReports: https://github.com/Infectious-Disease-Modeling-Hubs/hubEnsembles/issues
@@ -41,15 +42,15 @@ Imports:
     distfromq (>= 1.0.2),
     dplyr,
     Hmisc,
-    hubUtils,
+    hubUtils (>= 0.0.1),
     magrittr,
     matrixStats,
     purrr,
-    rlang,
     tidyr,
     tidyselect
 Remotes:
     Infectious-Disease-Modeling-Hubs/hubUtils,
+    Infectious-Disease-Modeling-Hubs/hubData,
     reichlab/distfromq
 Config/Needs/website: Infectious-Disease-Modeling-Hubs/hubStyle
 Depends: 

diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,3 @@
+# hubEnsembles 0.1.1
+
+* Initial Release.
diff --git a/data-raw/example_model_output.R b/data-raw/example_model_output.R
@@ -2,10 +2,11 @@
 ## note: requires example-complex-forecast-hub and hubEnsembles are
 ##       cloned into the same folder
 
-library(hubUtils)
+library(hubData)
 hub_path <- "../example-complex-forecast-hub"
-example_model_output <- hubUtils::connect_hub(hub_path) |>
+example_model_output <- hubData::connect_hub(hub_path) |>
     dplyr::collect() |>
     dplyr::select(model_id, location, reference_date, horizon, target_end_date, target, output_type, output_type_id, value)
 
 usethis::use_data(example_model_output, overwrite = TRUE)
+
diff --git a/inst/example-data/example-simple-forecast-hub/README.Rmd b/inst/example-data/example-simple-forecast-hub/README.Rmd
@@ -29,8 +29,8 @@
 library(hubUtils)
 library(dplyr)
 
-model_outputs <- hubUtils::connect_hub(hub_path = ".") %>%
+model_outputs <- hubData::connect_hub(hub_path = ".") %>%
     dplyr::collect()
 head(model_outputs)
 
 target_data <- read.csv("target-data/covid-hospitalizations.csv")

diff --git a/inst/example-data/example-simple-forecast-hub/README.md b/inst/example-data/example-simple-forecast-hub/README.md
@@ -28,7 +28,7 @@ To work with the data in R, you can use code like the following:
 library(hubUtils)
 library(dplyr)
 
-model_outputs <- hubUtils::connect_hub(hub_path = ".") %>%
+model_outputs <- hubData::connect_hub(hub_path = ".") %>%
     dplyr::collect()
 head(model_outputs)
 #> # A tibble: 6 × 8

diff --git a/vignettes/hubEnsembles.Rmd b/vignettes/hubEnsembles.Rmd
@@ -35,14 +35,17 @@ The `example-simple-forecast-hub` has been created by the Consortium of Infectio
 
 ```{r}
 hub_path <- system.file("example-data/example-simple-forecast-hub",
-                        package = "hubEnsembles")
+  package = "hubEnsembles"
+)
 
-model_outputs <- hubUtils::connect_hub(hub_path) %>%
+model_outputs <- hubData::connect_hub(hub_path) %>%
   dplyr::collect()
 head(model_outputs)
 
-target_data_path <- file.path(hub_path, "target-data",
-                              "covid-hospitalizations.csv")
+target_data_path <- file.path(
+  hub_path, "target-data",
+  "covid-hospitalizations.csv"
+)
 target_data <- read.csv(target_data_path)
 head(target_data)
 ```
@@ -63,23 +66,25 @@ head(mean_ens)
 We can change the function used to aggregate across model outputs. For example, we may want to calculate a median of component model submitted values for each quantile. We will also use the `model_id` argument to distinguish this ensemble.
 
 ```{r}
-median_ens <- hubEnsembles::simple_ensemble(model_outputs, 
-                                            agg_fun = median, 
-                                            model_id = "hub-ensemble-median")
+median_ens <- hubEnsembles::simple_ensemble(model_outputs,
+  agg_fun = median,
+  model_id = "hub-ensemble-median"
+)
 head(median_ens)
 ```
 
 Custom functions can also be passed into the `agg_fun` argument. For example, a geometric mean may be a more appropriate way to combine component model outputs. Any custom function to be used requires an argument `x` for the vector of numeric values to summarize, and if relevant, an argument `w` of numeric weights.
 
 ```{r}
-geometric_mean <- function(x){
-    n <- length(x)
-    return(prod(x)^(1/n))
+geometric_mean <- function(x) {
+  n <- length(x)
+  return(prod(x)^(1 / n))
 }
 
-geometric_mean_ens <-  hubEnsembles::simple_ensemble(model_outputs, 
-                                            agg_fun = geometric_mean, 
-                                            model_id = "hub-ensemble-geometric")
+geometric_mean_ens <- hubEnsembles::simple_ensemble(model_outputs,
+  agg_fun = geometric_mean,
+  model_id = "hub-ensemble-geometric"
+)
 head(geometric_mean_ens)
 ```
 
@@ -88,12 +93,15 @@ head(geometric_mean_ens)
 In addition, we can weight the contributions of each model by providing a table of weights, which are provided in a `data.frame` with a `model_id` column and a `weight` column.
 
 ```{r}
-model_weights <- data.frame(model_id = c("UMass-ar", "UMass-gbq", "simple_hub-baseline"), 
-                            weight = c(0.4, 0.4, 0.2))
+model_weights <- data.frame(
+  model_id = c("UMass-ar", "UMass-gbq", "simple_hub-baseline"),
+  weight = c(0.4, 0.4, 0.2)
+)
 
-weighted_mean_ens <- hubEnsembles::simple_ensemble(model_outputs, 
-                                                   weights = model_weights, 
-                                                   model_id = "hub-ensemble-weighted-mean")
+weighted_mean_ens <- hubEnsembles::simple_ensemble(model_outputs,
+  weights = model_weights,
+  model_id = "hub-ensemble-weighted-mean"
+)
 head(weighted_mean_ens)
 ```
 
@@ -105,7 +113,7 @@ For `mean`, `cdf` and `pmf` output types, the linear pool is equivalent to using
 
 ```{r}
 linear_pool_ens <- hubEnsembles::linear_pool(model_outputs %>%
-                                               filter(output_type != "median"))
+  filter(output_type != "median"))
 head(linear_pool_ens)
 ```
 
@@ -114,36 +122,45 @@ head(linear_pool_ens)
 ```{r}
 basic_plot_function <- function(plot_df, truth_df, plain_line = 0.5, ribbon = c(0.975, 0.025),
                                 forecast_date) {
-
   plain_df <- dplyr::filter(plot_df, output_type_id == plain_line)
 
   ribbon_df <- dplyr::filter(plot_df, output_type_id %in% ribbon) %>%
     dplyr::mutate(output_type_id = ifelse(output_type_id == min(ribbon),
-                                          "min", "max")) %>% 
+      "min", "max"
+    )) %>%
     tidyr::pivot_wider(names_from = output_type_id, values_from = value)
 
-  plot_model <- plot_ly(height = 600, colors = scales::hue_pal()(50)) 
+  plot_model <- plot_ly(height = 600, colors = scales::hue_pal()(50))
 
   if (!is.null(truth_df)) {
-    plot_model <- plot_model %>% 
-      add_trace(data = truth_df, x = ~time_idx, y = ~value, type = "scatter",
-                mode = "lines+markers", line = list(color = "#6e6e6e"),
-                hoverinfo = "text", name = "ground truth",
-                hovertext = paste("Date: ", truth_df$time_value, "<br>", 
-                                  "Ground truth: ", 
-                                  format(truth_df$value, big.mark = ","), 
-                             sep = ""), 
-                marker = list(color = "#6e6e6e", size = 7))
+    plot_model <- plot_model %>%
+      add_trace(
+        data = truth_df, x = ~time_idx, y = ~value, type = "scatter",
+        mode = "lines+markers", line = list(color = "#6e6e6e"),
+        hoverinfo = "text", name = "ground truth",
+        hovertext = paste("Date: ", truth_df$time_value, "<br>",
+          "Ground truth: ",
+          format(truth_df$value, big.mark = ","),
+          sep = ""
+        ),
+        marker = list(color = "#6e6e6e", size = 7)
+      )
   }
-  plot_model <- plot_model %>% 
-    add_lines(data = plain_df, x = ~target_date, y = ~value, 
-              color = ~model_id) %>% 
-    add_ribbons(data = ribbon_df, x = ~target_date, ymin = ~min, 
-                ymax = ~max, color = ~model_id, opacity = 0.25, 
-                line = list(width = 0), showlegend = FALSE) %>%
-    plotly::layout(shapes = list(type = "line", y0 = 0, y1 = 1, yref = "paper",
-                                x0 = forecast_date, x1 = forecast_date,
-                                line = list(color = "gray")))
+  plot_model <- plot_model %>%
+    add_lines(
+      data = plain_df, x = ~target_date, y = ~value,
+      color = ~model_id
+    ) %>%
+    add_ribbons(
+      data = ribbon_df, x = ~target_date, ymin = ~min,
+      ymax = ~max, color = ~model_id, opacity = 0.25,
+      line = list(width = 0), showlegend = FALSE
+    ) %>%
+    plotly::layout(shapes = list(
+      type = "line", y0 = 0, y1 = 1, yref = "paper",
+      x0 = forecast_date, x1 = forecast_date,
+      line = list(color = "gray")
+    ))
 }
 ```
 
@@ -153,11 +170,14 @@ plot_df <- dplyr::bind_rows(model_outputs, mean_ens) %>%
   dplyr::mutate(target_date = origin_date + horizon)
 
 plot <- basic_plot_function(
-    plot_df,
-    truth_df = target_data %>%
-        dplyr::filter(location == "US",
-                      time_idx >= "2022-10-01",
-                      time_idx <= "2023-03-01"),
-    forecast_date = "2022-12-12")
+  plot_df,
+  truth_df = target_data %>%
+    dplyr::filter(
+      location == "US",
+      time_idx >= "2022-10-01",
+      time_idx <= "2023-03-01"
+    ),
+  forecast_date = "2022-12-12"
+)
 plot
 ```