From 46744315a04352cda5507c802aeab7a66db80533 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Fri, 12 Jan 2024 17:28:41 +0000
Subject: [PATCH 01/50] add WIP .sim_network_bp function

---
 R/sim_network_bp.R        | 122 ++++++++++++++++++++++++++++++++++++++
 man/dot-sim_network_bp.Rd |  35 +++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 R/sim_network_bp.R
 create mode 100644 man/dot-sim_network_bp.Rd
diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
new file mode 100644
index 00000000..6f4dd7ae
--- /dev/null
+++ b/R/sim_network_bp.R
@@ -0,0 +1,122 @@
+#' Simulate a random network branching process model with a probability of
+#' infection for each contact
+#'
+#' @description
+#' Simulate a branching process on a infinite network where the contact
+#' distribution provides a function to sample the number of contacts of each
+#' individual in the simulation. Each contact is then infected with the
+#' probability of infection. The time between each infection is determined
+#' by the contact interval function.
+#'
+#' @details
+#' Preferential attachment ...
+#'
+#'
+#' @param mean_contacts A `numeric` for the mean number of contacts across the
+#' population.
+#' @param contact_interval An `<epidist>` object or anonymous function for
+#' the contact interval. The contact interval is similar to the generation time
+#' or serial interval, but describes the time between becoming infected by a
+#' contact and contacting another susceptible individual.
+#' @param prob_infect A `numeric` for the probability of a secondary contact
+#' being infected by an infected primary contact.
+#'
+#' @return A `<data.frame>` with the contact and transmission chain data.
+#' @keywords internal
+.sim_network_bp <- function(mean_contacts, contact_interval, prob_infect) {
+  # input checking
+  stopifnot(
+    "Input delay distributions need to be either functions or <epidist>" =
+      inherits(contact_interval, c("function", "epidist"))
+
+  )
+  checkmate::assert_number(prob_infect, lower = 0, upper = 1)
+  checkmate::assert_number(mean_contacts, lower = 1, upper = 50)
+
+  # convert distribution to func if needed
+  contact_interval <- as.function(contact_interval, func_type = "generate")
+
+  # initialise data object
+  id <- 1:100
+  ancestor <- vector(mode = "integer", 100)
+  generation <- vector(mode = "integer", 100)
+  infected <- vector(mode = "integer", 100)
+  time <- vector(mode = "double", 100)
+
+  # store initial individual
+  ancestor[1] <- NA_integer_
+  generation[1] <- 1L
+  # 1 is infected, 0 is non-infected contact
+  infected[1] <- 1L
+  time[1] <- 0
+
+  # initialise counters
+  next_gen_size <- 1L
+  chain_generation <- 1L
+  chain_size <- 1L
+  chain_length <- 1L
+  ancestor_idx <- 1L
+
+  # run loop until no more individuals are sampled
+  while (next_gen_size > 0) {
+    # sample contact distribution with preferential attachment
+    q <- dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
+    q <- q / sum(q)
+    contacts <- sample(0:100, size = 1, prob = q)
+
+    # add contacts if sampled
+    if (sum(contacts) > 0L) {
+      chain_size <- chain_size + sum(contacts)
+      chain_length <- chain_length + min(1L, contacts)
+      chain_generation <- chain_generation + 1L
+
+      for (i in seq_along(contacts)) {
+
+        if (contacts[i] > 0) {
+
+          generation_vec_idx <-
+            which.min(generation):(which.min(generation) + contacts[i] - 1)
+          generation[generation_vec_idx] <- chain_generation
+
+          ancestor_vec_idx <-
+            which.min(ancestor):(which.min(ancestor) + contacts[i] - 1)
+          ancestor[ancestor_vec_idx] <- ancestor_idx
+
+          # sample infected contacts
+          infect <- runif(contacts[i], min = 0, max = 1) < prob_infect
+          infected[generation_vec_idx] <- as.numeric(infect)
+
+          # add delay time
+          time[generation_vec_idx] <-
+            contact_interval(length(generation_vec_idx)) +
+            time[which(ancestor == ancestor_idx)]
+
+          ancestor_idx <- ancestor_idx + min(which(infect), 1)
+        } else {
+          ancestor_idx <- ancestor_idx + 1L
+        }
+      }
+      next_gen_size <- sum(infected[which(generation == max(generation))])
+    } else {
+      next_gen_size <- 0L
+    }
+  }
+
+  ancestor <- ancestor[ancestor != 0]
+  generation <- generation[generation != 0]
+
+  # remove unused IDs
+  id <- id[seq_along(generation)]
+  infected <- infected[seq_along(generation)]
+  infected <- ifelse(test = infected, yes = "infected", no = "contact")
+  time <- time[seq_along(generation)]
+
+  # return chain as <data.frame>
+  data.frame(
+    id = id,
+    ancestor = ancestor,
+    generation = generation,
+    infected = infected,
+    time = time
+  )
+}
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
new file mode 100644
index 00000000..6c1504c5
--- /dev/null
+++ b/man/dot-sim_network_bp.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sim_network_bp.R
+\name{.sim_network_bp}
+\alias{.sim_network_bp}
+\title{Simulate a random network branching process model with a probability of
+infection for each contact}
+\usage{
+.sim_network_bp(mean_contacts, contact_interval, prob_infect)
+}
+\arguments{
+\item{mean_contacts}{A \code{numeric} for the mean number of contacts across the
+population.}
+
+\item{contact_interval}{An \verb{<epidist>} object or anonymous function for
+the contact interval. The contact interval is similar to the generation time
+or serial interval, but describes the time between becoming infected by a
+contact and contacting another susceptible individual.}
+
+\item{prob_infect}{A \code{numeric} for the probability of a secondary contact
+being infected by an infected primary contact.}
+}
+\value{
+A \verb{<data.frame>} with the contact and transmission chain data.
+}
+\description{
+Simulate a branching process on a infinite network where the contact
+distribution provides a function to sample the number of contacts of each
+individual in the simulation. Each contact is then infected with the
+probability of infection. The time between each infection is determined
+by the contact interval function.
+}
+\details{
+Preferential attachment ...
+}
+\keyword{internal}

From c01e77eb0e22b99c56c0c21039996deb5310ff09 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 15 Jan 2024 18:07:53 +0000
Subject: [PATCH 02/50] fix sampling contacts and bookkeeping ancestors

---
 R/sim_network_bp.R | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 6f4dd7ae..e9117149 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -56,13 +56,14 @@
   chain_size <- 1L
   chain_length <- 1L
   ancestor_idx <- 1L
+  prev_ancestors <- 1L
 
   # run loop until no more individuals are sampled
   while (next_gen_size > 0) {
     # sample contact distribution with preferential attachment
     q <- dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
     q <- q / sum(q)
-    contacts <- sample(0:100, size = 1, prob = q)
+    contacts <- sample(0:100, size = next_gen_size, prob = q)
 
     # add contacts if sampled
     if (sum(contacts) > 0L) {
@@ -80,22 +81,20 @@
 
           ancestor_vec_idx <-
             which.min(ancestor):(which.min(ancestor) + contacts[i] - 1)
-          ancestor[ancestor_vec_idx] <- ancestor_idx
+          ancestor[ancestor_vec_idx] <- ancestor_idx[i]
 
           # sample infected contacts
-          infect <- runif(contacts[i], min = 0, max = 1) < prob_infect
+          infect <- rbinom(n = contacts[i], size = 1, prob = prob_infect)
           infected[generation_vec_idx] <- as.numeric(infect)
 
           # add delay time
           time[generation_vec_idx] <-
             contact_interval(length(generation_vec_idx)) +
-            time[which(ancestor == ancestor_idx)]
-
-          ancestor_idx <- ancestor_idx + min(which(infect), 1)
-        } else {
-          ancestor_idx <- ancestor_idx + 1L
+            time[which(ancestor == ancestor_idx[i])]
         }
       }
+      ancestor_idx <- setdiff(which(infected == 1), prev_ancestors)
+      prev_ancestors <- c(prev_ancestors, ancestor_idx)
       next_gen_size <- sum(infected[which(generation == max(generation))])
     } else {
       next_gen_size <- 0L

From 69e99c944f2cdf329bf3693ea18f02dd002ee126 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 10:31:42 +0000
Subject: [PATCH 03/50] fixed sampling contact distribution with replacement

---
 R/sim_network_bp.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index e9117149..52083290 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -63,7 +63,7 @@
     # sample contact distribution with preferential attachment
     q <- dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
     q <- q / sum(q)
-    contacts <- sample(0:100, size = next_gen_size, prob = q)
+    contacts <- sample(0:100, size = next_gen_size, replace = TRUE, prob = q)
 
     # add contacts if sampled
     if (sum(contacts) > 0L) {

From 8a1d3b55ee7f69e8874123a0e5ae34d3bf4ac918 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 10:32:18 +0000
Subject: [PATCH 04/50] increase size of allocated vectors

---
 R/sim_network_bp.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 52083290..013f3b1b 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -37,11 +37,11 @@
   contact_interval <- as.function(contact_interval, func_type = "generate")
 
   # initialise data object
-  id <- 1:100
-  ancestor <- vector(mode = "integer", 100)
-  generation <- vector(mode = "integer", 100)
-  infected <- vector(mode = "integer", 100)
-  time <- vector(mode = "double", 100)
+  id <- 1:1e5
+  ancestor <- vector(mode = "integer", 1e5)
+  generation <- vector(mode = "integer", 1e5)
+  infected <- vector(mode = "integer", 1e5)
+  time <- vector(mode = "double", 1e5)
 
   # store initial individual
   ancestor[1] <- NA_integer_

From 49073638bf3920463835b9f501ca3303f91f28af Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 10:32:44 +0000
Subject: [PATCH 05/50] fixed issue counting chain length

---
 R/sim_network_bp.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 013f3b1b..21f13c0f 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -68,7 +68,7 @@
     # add contacts if sampled
     if (sum(contacts) > 0L) {
       chain_size <- chain_size + sum(contacts)
-      chain_length <- chain_length + min(1L, contacts)
+      chain_length <- chain_length + any(contacts >= 1L)
       chain_generation <- chain_generation + 1L
 
       for (i in seq_along(contacts)) {

From 6122f9b8b139dd24a71e3574d5d15b3483dc6263 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 11:33:46 +0000
Subject: [PATCH 06/50] simplified vector indexing in .sim_network_bp

---
 R/sim_network_bp.R | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 21f13c0f..14471c06 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -75,22 +75,20 @@
 
         if (contacts[i] > 0) {
 
-          generation_vec_idx <-
+          # vec index can be which.min of either generation or ancestor vec
+          vec_idx <-
             which.min(generation):(which.min(generation) + contacts[i] - 1)
-          generation[generation_vec_idx] <- chain_generation
 
-          ancestor_vec_idx <-
-            which.min(ancestor):(which.min(ancestor) + contacts[i] - 1)
-          ancestor[ancestor_vec_idx] <- ancestor_idx[i]
+          generation[vec_idx] <- chain_generation
+          ancestor[vec_idx] <- ancestor_idx[i]
 
           # sample infected contacts
           infect <- rbinom(n = contacts[i], size = 1, prob = prob_infect)
-          infected[generation_vec_idx] <- as.numeric(infect)
+          infected[vec_idx] <- as.numeric(infect)
 
-          # add delay time
-          time[generation_vec_idx] <-
-            contact_interval(length(generation_vec_idx)) +
-            time[which(ancestor == ancestor_idx[i])]
+          # add delay time, removing first element of ancestor time as it is NA
+          time[vec_idx] <- contact_interval(length(vec_idx)) +
+            time[ancestor == ancestor_idx[i]][-1]
         }
       }
       ancestor_idx <- setdiff(which(infected == 1), prev_ancestors)

From 36da3889576072f3b4ec842a02971bf037e6c210 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 11:34:42 +0000
Subject: [PATCH 07/50] simplified next_gen_size calculation in .sim_network_bp

---
 R/sim_network_bp.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 14471c06..a5cc9cca 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -93,7 +93,7 @@
       }
       ancestor_idx <- setdiff(which(infected == 1), prev_ancestors)
       prev_ancestors <- c(prev_ancestors, ancestor_idx)
-      next_gen_size <- sum(infected[which(generation == max(generation))])
+      next_gen_size <- length(ancestor_idx)
     } else {
       next_gen_size <- 0L
     }

From aabab0eef06a08006a5bd811f21c0e7876653035 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 12:03:36 +0000
Subject: [PATCH 08/50] remove preallocation of id vec and add vector growing
 to .sim_network_bp

---
 R/sim_network_bp.R | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index a5cc9cca..f7b3eeb4 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -37,7 +37,6 @@
   contact_interval <- as.function(contact_interval, func_type = "generate")
 
   # initialise data object
-  id <- 1:1e5
   ancestor <- vector(mode = "integer", 1e5)
   generation <- vector(mode = "integer", 1e5)
   infected <- vector(mode = "integer", 1e5)
@@ -79,6 +78,14 @@
           vec_idx <-
             which.min(generation):(which.min(generation) + contacts[i] - 1)
 
+          # grow vectors if idx exceeds length
+          if (max(vec_idx) > length(generation)) {
+            ancestor <- c(ancestor, vector(mode = "integer", 1e5))
+            generation <- c(generation, vector(mode = "integer", 1e5))
+            infected <- c(infected, vector(mode = "integer", 1e5))
+            time <- c(time, vector(mode = "double", 1e5))
+          }
+
           generation[vec_idx] <- chain_generation
           ancestor[vec_idx] <- ancestor_idx[i]
 
@@ -103,7 +110,7 @@
   generation <- generation[generation != 0]
 
   # remove unused IDs
-  id <- id[seq_along(generation)]
+  id <- seq_along(generation)
   infected <- infected[seq_along(generation)]
   infected <- ifelse(test = infected, yes = "infected", no = "contact")
   time <- time[seq_along(generation)]

From 5dc439cbf9ef6a4bbbee35a77476f304ede2e069 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 17:32:28 +0000
Subject: [PATCH 09/50] remove input checking from .sim_network_bp as it is an
 internal function

---
 R/sim_network_bp.R | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index f7b3eeb4..3a4803a2 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -24,17 +24,6 @@
 #' @return A `<data.frame>` with the contact and transmission chain data.
 #' @keywords internal
 .sim_network_bp <- function(mean_contacts, contact_interval, prob_infect) {
-  # input checking
-  stopifnot(
-    "Input delay distributions need to be either functions or <epidist>" =
-      inherits(contact_interval, c("function", "epidist"))
-
-  )
-  checkmate::assert_number(prob_infect, lower = 0, upper = 1)
-  checkmate::assert_number(mean_contacts, lower = 1, upper = 50)
-
-  # convert distribution to func if needed
-  contact_interval <- as.function(contact_interval, func_type = "generate")
 
   # initialise data object
   ancestor <- vector(mode = "integer", 1e5)

From a401eef55935953408b1c2d0ee6f15de0bc7bf8a Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 17:57:15 +0000
Subject: [PATCH 10/50] updated .check_sim_input with new model arguments

---
 R/checkers.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/R/checkers.R b/R/checkers.R
index 35fb10fc..bde24ea4 100644
--- a/R/checkers.R
+++ b/R/checkers.R
@@ -114,13 +114,13 @@
 #' called for its side-effects, which will error if the input is invalid.
 #' @keywords internal
 .check_sim_input <- function(sim_type = c("linelist", "contacts", "outbreak"), # nolint cyclocomp_linter
-                             R,
-                             serial_interval,
+                             mean_contacts,
+                             contact_interval,
+                             prob_infect,
                              outbreak_start_date,
                              min_outbreak_size,
                              onset_to_hosp = NULL,
                              onset_to_death = NULL,
-                             contact_distribution = NULL,
                              add_names = NULL,
                              add_ct = NULL,
                              case_type_probs = NULL,
@@ -131,8 +131,9 @@
                              population_age = NULL) {
   sim_type <- match.arg(sim_type)
 
-  checkmate::assert_number(R, lower = 0)
-  .check_func_req_args(serial_interval)
+  checkmate::assert_number(prob_infect, lower = 0, upper = 1)
+  checkmate::assert_number(mean_contacts, lower = 1)
+  .check_func_req_args(contact_interval)
   checkmate::assert_date(outbreak_start_date)
   checkmate::assert_integerish(min_outbreak_size, lower = 1)
 

From 4f6fd611c268592c7a75b6b92721ede7289d1ed2 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 17:57:47 +0000
Subject: [PATCH 11/50] updated .sim_contacts_tbl for new simulation model

---
 R/sim_contacts_tbl.R | 117 ++++++-------------------------------------
 1 file changed, 15 insertions(+), 102 deletions(-)

diff --git a/R/sim_contacts_tbl.R b/R/sim_contacts_tbl.R
index 5de90b5a..5ab11402 100644
--- a/R/sim_contacts_tbl.R
+++ b/R/sim_contacts_tbl.R
@@ -6,128 +6,41 @@
 #' @return A contacts `<data.frame>`
 #' @keywords internal
 .sim_contacts_tbl <- function(.data,
-                              outbreak_start_date,
-                              contact_distribution,
-                              population_age,
-                              contact_tracing_status_probs,
-                              config) {
+                              contact_tracing_status_probs) {
   if (!"infector_name" %in% colnames(.data)) {
     .data <- .add_names(.data = .data)
   }
 
-  contact_investigation <- subset(
+  contacts_tbl <- subset(
     .data,
     select = c(
-      "infector", "infector_name", "case_name", "age", "gender",
+      "infector_name", "case_name", "age", "gender",
       "date_first_contact", "date_last_contact"
     )
   )
-  colnames(contact_investigation) <- c(
-    "infector", "from", "to", "cnt_age", "cnt_gender",
-    "date_first_contact", "date_last_contact"
+  colnames(contacts_tbl) <- c(
+    "from", "to", "age", "gender", "date_first_contact",
+    "date_last_contact"
   )
-  contact_investigation <- contact_investigation[-1, ]
-  contact_investigation$was_case <- "Y"
-
-  other_contacts <- subset(.data, select = c("infector", "infector_time"))
-  other_contacts <- other_contacts[-1, ]
-  other_contacts$from <- contact_investigation$from
-  other_contacts <- subset(
-    other_contacts,
-    subset = !duplicated(other_contacts$infector)
-  )
-
-  # sample contact distribution for non-infected contacts
-  contact_freq <- contact_distribution(nrow(other_contacts))
-
-  # Multiply row by times specified in frequency column V
-  other_contacts <-
-    other_contacts[rep(row.names(other_contacts), contact_freq), ]
-
-  # add random age and gender
-  if (is.data.frame(population_age)) {
-    age_groups <- apply(population_age, MARGIN = 1, function(x) x[1]:x[2])
-    sample_weight <- rep(population_age$proportion, times = lengths(age_groups))
-    # normalise for vector length
-    sample_weight <- sample_weight /
-      rep(lengths(age_groups), times = lengths(age_groups))
-    other_contacts$cnt_age <- sample(
-      unlist(age_groups),
-      size = nrow(other_contacts),
-      replace = TRUE,
-      prob = sample_weight
-    )
-  } else {
-    other_contacts$cnt_age <- sample(
-      population_age[1]:population_age[2],
-      size = nrow(other_contacts),
-      replace = TRUE
-    )
-  }
-  other_contacts$cnt_gender <- sample(
-    c("m", "f"),
-    size = nrow(other_contacts),
-    replace = TRUE
-  )
-
-  # Add corresponding names acc to the gender V
-  other_contacts$to <- randomNames::randomNames(
-    which.names = "both",
-    name.sep = " ",
-    name.order = "first.last",
-    gender = other_contacts$cnt_gender,
-    sample.with.replacement = FALSE
+  contacts_tbl$was_case <- ifelse(
+    test = .data$infected == "infected",
+    yes = "Y",
+    no = "N"
   )
 
-  # add contact dates
-  other_contacts <- .add_date_contact(
-    .data = other_contacts,
-    contact_type = "last",
-    distribution = config$last_contact_distribution,
-    config$last_contact_distribution_params,
-    outbreak_start_date = outbreak_start_date
-  )
-  other_contacts <- .add_date_contact(
-    .data = other_contacts,
-    contact_type = "first",
-    distribution = config$first_contact_distribution,
-    config$first_contact_distribution_params
-  )
-
-  other_contacts <- subset(
-    other_contacts,
-    select = c(
-      "infector", "from", "to", "cnt_age", "cnt_gender",
-      "date_first_contact", "date_last_contact"
-    )
-  )
-  other_contacts$was_case <- "N"
-
-  # merge both datasets and keep all the values from both
-  contact_investigation <- rbind(contact_investigation, other_contacts)
-  contact_investigation <-
-    contact_investigation[order(contact_investigation$infector), ]
-  row.names(contact_investigation) <- NULL
-
   # add contact tracing status
-  pick_N <- which(contact_investigation$was_case == "N")
+  pick_N <- which(contacts_tbl$was_case == "N")
   status_type <- sample(
     names(contact_tracing_status_probs),
     size = length(pick_N),
     replace = TRUE,
     prob = contact_tracing_status_probs
   )
-  contact_investigation$status <- NA
-  contact_investigation$status[pick_N] <- status_type
-  contact_investigation$status <- ifelse(
-    test = is.na(contact_investigation$status),
-    yes = "case",
-    no = contact_investigation$status
-  )
+  contacts_tbl$status <- "case"
+  contacts_tbl$status[pick_N] <- status_type
 
-  # remove infector col
-  contact_investigation$infector <- NULL
+  contacts_tbl <- contacts_tbl[-1, ]
 
   # return contacts
-  contact_investigation
+  contacts_tbl
 }

From ae79da1cee658dd63de01b0fecbe4b81372b0ac5 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 17:59:00 +0000
Subject: [PATCH 12/50] updated exported simulation function to use new model
 arguments

---
 R/sim_contacts.R | 29 +++++++++++++----------------
 R/sim_linelist.R | 20 ++++++++++++--------
 R/sim_outbreak.R | 32 +++++++++++++++-----------------
 3 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index ecacee21..be2eba16 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -32,11 +32,11 @@
 #'
 #' contacts <- sim_contacts(
 #'   R = 1.1,
-#'   serial_interval = serial_interval,
-#'   contact_distribution = contact_distribution
+#'   serial_interval = serial_interval
 #' )
-sim_contacts <- function(R,
-                         serial_interval,
+sim_contacts <- function(mean_contacts,
+                         contact_interval,
+                         prob_infect,
                          contact_distribution,
                          outbreak_start_date = as.Date("2023-01-01"),
                          min_outbreak_size = 10,
@@ -50,10 +50,10 @@ sim_contacts <- function(R,
   # check and convert distribution to func if needed before .check_sim_input()
   stopifnot(
     "Input delay distributions need to be either functions or <epidist>" =
-      inherits(serial_interval, c("function", "epidist")) &&
+      inherits(contact_interval, c("function", "epidist")) &&
       inherits(contact_distribution, c("function", "epidist"))
   )
-  serial_interval <- as.function(serial_interval, func_type = "generate")
+  contact_interval <- as.function(serial_interval, func_type = "generate")
   contact_distribution <- as.function(
     contact_distribution,
     func_type = "generate"
@@ -61,18 +61,19 @@ sim_contacts <- function(R,
 
   .check_sim_input(
     sim_type = "contacts",
-    R = R,
-    serial_interval = serial_interval,
+    mean_contacts = mean_contacts,
+    contact_interval = contact_interval,
+    prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
-    contact_distribution = contact_distribution,
     contact_tracing_status_probs = contact_tracing_status_probs,
     population_age = population_age
   )
 
   chain <- .sim_bp_linelist(
-    R = R,
-    serial_interval = serial_interval,
+    mean_contacts = mean_contacts,
+    contact_interval = contact_interval,
+    prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
@@ -83,11 +84,7 @@ sim_contacts <- function(R,
 
   contacts <- .sim_contacts_tbl(
     .data = chain,
-    outbreak_start_date = outbreak_start_date,
-    contact_distribution = contact_distribution,
-    population_age = population_age,
-    contact_tracing_status_probs = contact_tracing_status_probs,
-    config = config
+    contact_tracing_status_probs = contact_tracing_status_probs
   )
 
   # return line list
diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index a774e415..c71bb27c 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -116,8 +116,9 @@
 #'   hosp_risk = age_dep_hosp_risk
 #' )
 #' head(linelist)
-sim_linelist <- function(R,
-                         serial_interval,
+sim_linelist <- function(mean_contacts,
+                         contact_interval,
+                         prob_infect,
                          onset_to_hosp,
                          onset_to_death,
                          hosp_risk = 0.2,
@@ -137,18 +138,19 @@ sim_linelist <- function(R,
   # check and convert distribution to func if needed before .check_sim_input()
   stopifnot(
     "Input delay distributions need to be either functions or <epidist>" =
-      inherits(serial_interval, c("function", "epidist")) &&
+      inherits(contact_interval, c("function", "epidist")) &&
       inherits(onset_to_hosp, c("function", "epidist")) &&
       inherits(onset_to_death, c("function", "epidist"))
   )
-  serial_interval <- as.function(serial_interval, func_type = "generate")
+  contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
 
   .check_sim_input(
     sim_type = "linelist",
-    R = R,
-    serial_interval = serial_interval,
+    mean_contacts = mean_contacts,
+    contact_interval = contact_interval,
+    prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     onset_to_hosp = onset_to_hosp,
@@ -191,8 +193,9 @@ sim_linelist <- function(R,
   }
 
   chain <- .sim_bp_linelist(
-    R = R,
-    serial_interval = serial_interval,
+    mean_contacts = mean_contacts,
+    contact_interval = contact_interval,
+    prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
@@ -213,6 +216,7 @@ sim_linelist <- function(R,
     config = config
   )
 
+  linelist$chain <- linelist$chain[linelist$chain$infected == "infected", ]
   chain <- linelist$chain[, linelist$cols]
 
   # return line list
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index 19d9cafd..65031bd9 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -18,7 +18,7 @@
 #'
 #' @examples
 #' # load data required to simulate outbreak data
-#' serial_interval <- epiparameter::epidist(
+#' contact_interval <- epiparameter::epidist(
 #'   disease = "COVID-19",
 #'   epi_dist = "serial interval",
 #'   prob_distribution = "gamma",
@@ -48,13 +48,13 @@
 #'
 #' outbreak <- sim_outbreak(
 #'   R = 1.1,
-#'   serial_interval = serial_interval,
+#'   contact_interval = contact_interval,
 #'   onset_to_hosp = onset_to_hosp,
-#'   onset_to_death = onset_to_death,
-#'   contact_distribution = contact_distribution
+#'   onset_to_death = onset_to_death
 #' )
-sim_outbreak <- function(R,
-                         serial_interval,
+sim_outbreak <- function(mean_contacts,
+                         contact_interval,
+                         prob_infect,
                          onset_to_hosp,
                          onset_to_death,
                          contact_distribution,
@@ -80,12 +80,12 @@ sim_outbreak <- function(R,
   # check and convert distribution to func if needed before .check_sim_input()
   stopifnot(
     "Input delay distributions need to be either functions or <epidist>" =
-      inherits(serial_interval, c("function", "epidist")) &&
+      inherits(contact_interval, c("function", "epidist")) &&
       inherits(onset_to_hosp, c("function", "epidist")) &&
       inherits(onset_to_death, c("function", "epidist")) &&
       inherits(contact_distribution, c("function", "epidist"))
   )
-  serial_interval <- as.function(serial_interval, func_type = "generate")
+  contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
   contact_distribution <- as.function(
@@ -95,8 +95,9 @@ sim_outbreak <- function(R,
 
   .check_sim_input(
     sim_type = "outbreak",
-    R = R,
-    serial_interval = serial_interval,
+    mean_contacts = mean_contacts,
+    contact_interval = contact_interval,
+    prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     onset_to_hosp = onset_to_hosp,
@@ -141,8 +142,9 @@ sim_outbreak <- function(R,
   }
 
   chain <- .sim_bp_linelist(
-    R = R,
-    serial_interval = serial_interval,
+    mean_contacts = mean_contacts,
+    contact_interval = contact_interval,
+    prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
@@ -165,11 +167,7 @@ sim_outbreak <- function(R,
 
   contacts <- .sim_contacts_tbl(
     .data = linelist$chain,
-    outbreak_start_date = outbreak_start_date,
-    contact_distribution = contact_distribution,
-    population_age = population_age,
-    contact_tracing_status_probs = contact_tracing_status_probs,
-    config = config
+    contact_tracing_status_probs = contact_tracing_status_probs
   )
 
   chain <- linelist$chain[, linelist$cols]

From 049a6ecc249cbb282d0669457ee06b33123b5190 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 17:59:30 +0000
Subject: [PATCH 13/50] updated .sim_bp_linelist to use .sim_network_bp

---
 R/sim_utils.R | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/R/sim_utils.R b/R/sim_utils.R
index ece171ce..b6302094 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -14,26 +14,23 @@
 NULL
 
 #' @name .sim_utils
-.sim_bp_linelist <- function(R,
-                             serial_interval,
+.sim_bp_linelist <- function(mean_contacts,
+                             contact_interval,
+                             prob_infect,
                              outbreak_start_date,
                              min_outbreak_size,
                              population_age,
                              config) {
-  chain_size <- 0
+  outbreak_size <- 0
   max_iter <- 0L
   # condition on a minimum chain size
-  while (chain_size < min_outbreak_size) {
-    chain <- bpmodels::chain_sim(
-      n = 1,
-      offspring = "pois",
-      stat = "size",
-      serial = serial_interval,
-      lambda = R,
-      tree = TRUE,
-      infinite = 1000
+  while (outbreak_size < min_outbreak_size) {
+    chain <- .sim_network_bp(
+      mean_contacts = mean_contacts,
+      contact_interval = contact_interval,
+      prob_infect = prob_infect
     )
-    chain_size <- max(chain$id)
+    outbreak_size <- sum(chain$infected == "infected")
     max_iter <- max_iter + 1L
     if (max_iter >= 1e4) {
       stop(
@@ -138,9 +135,9 @@ NULL
   }
 
   # add confirmed, probable, suspected case types
-  chain$case_type <- sample(
+  chain$case_type[chain$infected == "infected"] <- sample(
     x = names(case_type_probs),
-    size = nrow(chain),
+    size = sum(chain$infected == "infected"),
     replace = TRUE,
     prob = case_type_probs
   )

From fa35b3979315abcde7a8c5dfc8da2be55a2691d8 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 16 Jan 2024 17:59:53 +0000
Subject: [PATCH 14/50] updated add_cols function to sample infected
 individuals

---
 R/add_cols.R | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/R/add_cols.R b/R/add_cols.R
index dbcb6f8c..c076b88f 100644
--- a/R/add_cols.R
+++ b/R/add_cols.R
@@ -81,13 +81,16 @@ NULL
 .add_hospitalisation <- function(.data,
                                  onset_to_hosp,
                                  hosp_risk) {
-  .data$hospitalisation <- .data$time + onset_to_hosp(nrow(.data))
+  infected_idx <- .data$infected == "infected"
+  num_infected <- sum(infected_idx)
+  .data$hospitalisation <- NA_real_
+  .data$hospitalisation[infected_idx] <- .data$time[infected_idx] + onset_to_hosp(num_infected)
 
   if (is.numeric(hosp_risk)) {
     pop_sample <- sample(
-      seq_len(nrow(.data)),
+      which(infected_idx),
       replace = FALSE,
-      size = (1 - hosp_risk) * nrow(.data)
+      size = (1 - hosp_risk) * num_infected
     )
     .data$hospitalisation[pop_sample] <- NA
   } else {
@@ -113,14 +116,17 @@ NULL
                         onset_to_death,
                         hosp_death_risk,
                         non_hosp_death_risk) {
-  .data$deaths <- .data$time + onset_to_death(nrow(.data))
+  infected_idx <- .data$infected == "infected"
+  num_infected <- sum(infected_idx)
+  .data$deaths <- NA_real_
+  .data$deaths[infected_idx] <- .data$time[infected_idx] + onset_to_death(num_infected)
 
   apply_death_risk <- function(.data, risk, hosp = TRUE) {
     if (is.numeric(risk)) {
       pop_sample <- sample(
-        seq_len(nrow(.data)),
+        which(infected_idx),
         replace = FALSE,
-        size = (1 - risk) * nrow(.data)
+        size = (1 - risk) * num_infected
       )
       .data$deaths[pop_sample] <- NA
     } else {

From 0b629efc0b0121814ef4e38c47f9c19970b87533 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 09:43:40 +0000
Subject: [PATCH 15/50] removed contact_distribution from functions that no
 longer use it

---
 R/checkers.R     |  1 -
 R/sim_contacts.R | 11 +----------
 R/sim_outbreak.R |  9 +--------
 3 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/R/checkers.R b/R/checkers.R
index bde24ea4..a74cdea9 100644
--- a/R/checkers.R
+++ b/R/checkers.R
@@ -170,7 +170,6 @@
   }
 
   if (sim_type == "contacts" || sim_type == "outbreak") {
-    .check_func_req_args(contact_distribution)
     checkmate::assert_numeric(contact_tracing_status_probs, len = 3)
     checkmate::assert_names(
       names(contact_tracing_status_probs),
diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index be2eba16..3dfa3a0e 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -1,9 +1,6 @@
 #' Simulate contacts for an infectious disease outbreak
 #'
 #' @inheritParams sim_linelist
-#' @param contact_distribution An `<epidist>` object or anonymous function for
-#' the contact distribution. This is similar to the offspring distribution,
-#' but describes the heterogeneity of contacts that are not infected.
 #' @param contact_tracing_status_probs A named `numeric` vector with the
 #' probability of each contact tracing status. The names of the vector must
 #' be `"under_followup"`, `"lost_to_followup"`, `"unknown"`. Values of each
@@ -37,7 +34,6 @@
 sim_contacts <- function(mean_contacts,
                          contact_interval,
                          prob_infect,
-                         contact_distribution,
                          outbreak_start_date = as.Date("2023-01-01"),
                          min_outbreak_size = 10,
                          population_age = c(1, 90),
@@ -50,14 +46,9 @@ sim_contacts <- function(mean_contacts,
   # check and convert distribution to func if needed before .check_sim_input()
   stopifnot(
     "Input delay distributions need to be either functions or <epidist>" =
-      inherits(contact_interval, c("function", "epidist")) &&
-      inherits(contact_distribution, c("function", "epidist"))
+      inherits(contact_interval, c("function", "epidist"))
   )
   contact_interval <- as.function(serial_interval, func_type = "generate")
-  contact_distribution <- as.function(
-    contact_distribution,
-    func_type = "generate"
-  )
 
   .check_sim_input(
     sim_type = "contacts",
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index 65031bd9..6547d6ce 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -57,7 +57,6 @@ sim_outbreak <- function(mean_contacts,
                          prob_infect,
                          onset_to_hosp,
                          onset_to_death,
-                         contact_distribution,
                          hosp_risk = 0.2,
                          hosp_death_risk = 0.5,
                          non_hosp_death_risk = 0.05,
@@ -82,16 +81,11 @@ sim_outbreak <- function(mean_contacts,
     "Input delay distributions need to be either functions or <epidist>" =
       inherits(contact_interval, c("function", "epidist")) &&
       inherits(onset_to_hosp, c("function", "epidist")) &&
-      inherits(onset_to_death, c("function", "epidist")) &&
-      inherits(contact_distribution, c("function", "epidist"))
+      inherits(onset_to_death, c("function", "epidist"))
   )
   contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
-  contact_distribution <- as.function(
-    contact_distribution,
-    func_type = "generate"
-  )
 
   .check_sim_input(
     sim_type = "outbreak",
@@ -102,7 +96,6 @@ sim_outbreak <- function(mean_contacts,
     min_outbreak_size = min_outbreak_size,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
-    contact_distribution = contact_distribution,
     add_names = add_names,
     add_ct = add_ct,
     case_type_probs = case_type_probs,

From 30e7bfbf93525590882fde4d706a41e067e8e4b3 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 09:44:19 +0000
Subject: [PATCH 16/50] removed documentation for old arguments and added
 documentation for new arguments (mean_contacts, contact_interval,
 prob_infect)

---
 R/sim_linelist.R | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index c71bb27c..4ccea45f 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -21,9 +21,15 @@
 #' that age group. Proportions must sum to one.
 #'
 #'
-#' @param R A single `numeric` for the reproduction number.
-#' @param serial_interval An `<epidist>` object or anonymous function for
-#' the serial interval.
+#' @param mean_contacts A single `numeric` for the mean number of contacts per
+#' infection.
+#' @param contact_interval An `<epidist>` object or anonymous function for
+#' the contact interval. This is analogous to the serial interval or generation
+#' time, but gives the time interval between someone being infected/infectious
+#' (in the simulation the latency period is assumed to be zero) and having
+#' contact with another individual.
+#' @param prob_infect A single `numeric` for the probability of a contact
+#' being infected.
 #' @param onset_to_hosp An `<epidist>` object or anonymous function for
 #' the onset to hospitalisation delay distribution.
 #' @param onset_to_death An `<epidist>` object or anonymous function for

From 42b54960d69b449dfa400e3d0f95735734905ab7 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 09:44:45 +0000
Subject: [PATCH 17/50] added details to .sim_network_bp documentation

---
 R/sim_network_bp.R | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 3a4803a2..72d85289 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -9,8 +9,13 @@
 #' by the contact interval function.
 #'
 #' @details
-#' Preferential attachment ...
-#'
+#' The contact distribution sampled takes the network effect
+#' \eqn{q(n) ~ (n + 1)p(n + 1)} where \eqn{p(n)} is the probability density function
+#' of a distribution, e.g., Poisson or Negative binomial. That is to say, the
+#' probability of having choosing a contact at random by following up a
+#' contact chooses individuals with a probability proportional to their number
+#' of contacts. The plus one is because one of the contacts was "used" to
+#' infect the person.
 #'
 #' @param mean_contacts A `numeric` for the mean number of contacts across the
 #' population.
@@ -48,7 +53,7 @@
 
   # run loop until no more individuals are sampled
   while (next_gen_size > 0) {
-    # sample contact distribution with preferential attachment
+    # sample contact distribution
     q <- dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
     q <- q / sum(q)
     contacts <- sample(0:100, size = next_gen_size, replace = TRUE, prob = q)

From 1f0d2c88b46e31d8e79e6ac0fd6e245de68a815a Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 09:45:19 +0000
Subject: [PATCH 18/50] updated man files for updated functions

---
 man/dot-check_sim_input.Rd  | 23 +++++++++++++----------
 man/dot-sim_contacts_tbl.Rd | 26 +-------------------------
 man/dot-sim_network_bp.Rd   |  8 +++++++-
 man/dot-sim_utils.Rd        | 18 +++++++++++++-----
 man/sim_contacts.Rd         | 24 +++++++++++++-----------
 man/sim_linelist.Rd         | 18 +++++++++++++-----
 man/sim_outbreak.Rd         | 30 ++++++++++++++++--------------
 7 files changed, 76 insertions(+), 71 deletions(-)

diff --git a/man/dot-check_sim_input.Rd b/man/dot-check_sim_input.Rd
index cdc3889b..c10f5feb 100644
--- a/man/dot-check_sim_input.Rd
+++ b/man/dot-check_sim_input.Rd
@@ -6,13 +6,13 @@
 \usage{
 .check_sim_input(
   sim_type = c("linelist", "contacts", "outbreak"),
-  R,
-  serial_interval,
+  mean_contacts,
+  contact_interval,
+  prob_infect,
   outbreak_start_date,
   min_outbreak_size,
   onset_to_hosp = NULL,
   onset_to_death = NULL,
-  contact_distribution = NULL,
   add_names = NULL,
   add_ct = NULL,
   case_type_probs = NULL,
@@ -27,10 +27,17 @@
 \item{sim_type}{A \code{character} string specifying which simulation function
 this function is being called within.}
 
-\item{R}{A single \code{numeric} for the reproduction number.}
+\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
+infection.}
 
-\item{serial_interval}{An \verb{<epidist>} object or anonymous function for
-the serial interval.}
+\item{contact_interval}{An \verb{<epidist>} object or anonymous function for
+the contact interval. This is analogous to the serial interval or generation
+time, but gives the time interval between someone being infected/infectious
+(in the simulation the latency period is assumed to be zero) and having
+contact with another individual.}
+
+\item{prob_infect}{A single \code{numeric} for the probability of a contact
+being infected.}
 
 \item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
 
@@ -44,10 +51,6 @@ the onset to hospitalisation delay distribution.}
 \item{onset_to_death}{An \verb{<epidist>} object or anonymous function for
 the onset to death delay distribution.}
 
-\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
-the contact distribution. This is similar to the offspring distribution,
-but describes the heterogeneity of contacts that are not infected.}
-
 \item{add_names}{A \code{logical} boolean for whether to add names to each row
 of the line list. Default is \code{TRUE}.}
 
diff --git a/man/dot-sim_contacts_tbl.Rd b/man/dot-sim_contacts_tbl.Rd
index 1bdf9974..5968a010 100644
--- a/man/dot-sim_contacts_tbl.Rd
+++ b/man/dot-sim_contacts_tbl.Rd
@@ -4,40 +4,16 @@
 \alias{.sim_contacts_tbl}
 \title{Create contacts table}
 \usage{
-.sim_contacts_tbl(
-  .data,
-  outbreak_start_date,
-  contact_distribution,
-  population_age,
-  contact_tracing_status_probs,
-  config
-)
+.sim_contacts_tbl(.data, contact_tracing_status_probs)
 }
 \arguments{
 \item{.data}{A \verb{<data.frame>} containing the infectious history from a
 branching process simulation.}
 
-\item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
-
-\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
-the contact distribution. This is similar to the offspring distribution,
-but describes the heterogeneity of contacts that are not infected.}
-
-\item{population_age}{Either a \code{numeric} vector with two elements or a
-\verb{<data.frame>} with age structure in the population. Use a \code{numeric} vector
-to specific the age range of the population, the first element is the lower
-bound for the age range, and and the second is the upper bound for the age
-range (both inclusive, i.e. [lower, upper]). The \verb{<data.frame>} with
-age groups and the proportion of the population in that group. See details
-and examples for more information.}
-
 \item{contact_tracing_status_probs}{A named \code{numeric} vector with the
 probability of each contact tracing status. The names of the vector must
 be \code{"under_followup"}, \code{"lost_to_followup"}, \code{"unknown"}. Values of each
 contact tracing status must sum to one.}
-
-\item{config}{A list of settings to adjust the randomly sampled delays and
-Ct values (if \code{add_ct = TRUE}). See \code{\link[=create_config]{create_config()}} for more information.}
 }
 \value{
 A contacts \verb{<data.frame>}
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
index 6c1504c5..d8e7c652 100644
--- a/man/dot-sim_network_bp.Rd
+++ b/man/dot-sim_network_bp.Rd
@@ -30,6 +30,12 @@ probability of infection. The time between each infection is determined
 by the contact interval function.
 }
 \details{
-Preferential attachment ...
+The contact distribution sampled takes the network effect
+\eqn{q(n) ~ (n + 1)p(n + 1)} where \eqn{p(n)} is the probability density function
+of a distribution, e.g., Poisson or Negative binomial. That is to say, the
+probability of having choosing a contact at random by following up a
+contact chooses individuals with a probability proportional to their number
+of contacts. The plus one is because one of the contacts was "used" to
+infect the person.
 }
 \keyword{internal}
diff --git a/man/dot-sim_utils.Rd b/man/dot-sim_utils.Rd
index decb0283..da649e72 100644
--- a/man/dot-sim_utils.Rd
+++ b/man/dot-sim_utils.Rd
@@ -8,8 +8,9 @@
 within \pkg{simulist}}
 \usage{
 .sim_bp_linelist(
-  R,
-  serial_interval,
+  mean_contacts,
+  contact_interval,
+  prob_infect,
   outbreak_start_date,
   min_outbreak_size,
   population_age,
@@ -31,10 +32,17 @@ within \pkg{simulist}}
 )
 }
 \arguments{
-\item{R}{A single \code{numeric} for the reproduction number.}
+\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
+infection.}
 
-\item{serial_interval}{An \verb{<epidist>} object or anonymous function for
-the serial interval.}
+\item{contact_interval}{An \verb{<epidist>} object or anonymous function for
+the contact interval. This is analogous to the serial interval or generation
+time, but gives the time interval between someone being infected/infectious
+(in the simulation the latency period is assumed to be zero) and having
+contact with another individual.}
+
+\item{prob_infect}{A single \code{numeric} for the probability of a contact
+being infected.}
 
 \item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
 
diff --git a/man/sim_contacts.Rd b/man/sim_contacts.Rd
index 1fcdeb0f..a143e0d1 100644
--- a/man/sim_contacts.Rd
+++ b/man/sim_contacts.Rd
@@ -5,9 +5,9 @@
 \title{Simulate contacts for an infectious disease outbreak}
 \usage{
 sim_contacts(
-  R,
-  serial_interval,
-  contact_distribution,
+  mean_contacts,
+  contact_interval,
+  prob_infect,
   outbreak_start_date = as.Date("2023-01-01"),
   min_outbreak_size = 10,
   population_age = c(1, 90),
@@ -17,14 +17,17 @@ sim_contacts(
 )
 }
 \arguments{
-\item{R}{A single \code{numeric} for the reproduction number.}
+\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
+infection.}
 
-\item{serial_interval}{An \verb{<epidist>} object or anonymous function for
-the serial interval.}
+\item{contact_interval}{An \verb{<epidist>} object or anonymous function for
+the contact interval. This is analogous to the serial interval or generation
+time, but gives the time interval between someone being infected/infectious
+(in the simulation the latency period is assumed to be zero) and having
+contact with another individual.}
 
-\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
-the contact distribution. This is similar to the offspring distribution,
-but describes the heterogeneity of contacts that are not infected.}
+\item{prob_infect}{A single \code{numeric} for the probability of a contact
+being infected.}
 
 \item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
 
@@ -72,8 +75,7 @@ contact_distribution <- epiparameter::epidist(
 
 contacts <- sim_contacts(
   R = 1.1,
-  serial_interval = serial_interval,
-  contact_distribution = contact_distribution
+  serial_interval = serial_interval
 )
 }
 \author{
diff --git a/man/sim_linelist.Rd b/man/sim_linelist.Rd
index 6e95aec5..9f1353e9 100644
--- a/man/sim_linelist.Rd
+++ b/man/sim_linelist.Rd
@@ -5,8 +5,9 @@
 \title{Simulate a line list}
 \usage{
 sim_linelist(
-  R,
-  serial_interval,
+  mean_contacts,
+  contact_interval,
+  prob_infect,
   onset_to_hosp,
   onset_to_death,
   hosp_risk = 0.2,
@@ -22,10 +23,17 @@ sim_linelist(
 )
 }
 \arguments{
-\item{R}{A single \code{numeric} for the reproduction number.}
+\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
+infection.}
 
-\item{serial_interval}{An \verb{<epidist>} object or anonymous function for
-the serial interval.}
+\item{contact_interval}{An \verb{<epidist>} object or anonymous function for
+the contact interval. This is analogous to the serial interval or generation
+time, but gives the time interval between someone being infected/infectious
+(in the simulation the latency period is assumed to be zero) and having
+contact with another individual.}
+
+\item{prob_infect}{A single \code{numeric} for the probability of a contact
+being infected.}
 
 \item{onset_to_hosp}{An \verb{<epidist>} object or anonymous function for
 the onset to hospitalisation delay distribution.}
diff --git a/man/sim_outbreak.Rd b/man/sim_outbreak.Rd
index 7ad98942..4d86e869 100644
--- a/man/sim_outbreak.Rd
+++ b/man/sim_outbreak.Rd
@@ -5,11 +5,11 @@
 \title{Simulate a line list and a contacts table}
 \usage{
 sim_outbreak(
-  R,
-  serial_interval,
+  mean_contacts,
+  contact_interval,
+  prob_infect,
   onset_to_hosp,
   onset_to_death,
-  contact_distribution,
   hosp_risk = 0.2,
   hosp_death_risk = 0.5,
   non_hosp_death_risk = 0.05,
@@ -25,10 +25,17 @@ sim_outbreak(
 )
 }
 \arguments{
-\item{R}{A single \code{numeric} for the reproduction number.}
+\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
+infection.}
 
-\item{serial_interval}{An \verb{<epidist>} object or anonymous function for
-the serial interval.}
+\item{contact_interval}{An \verb{<epidist>} object or anonymous function for
+the contact interval. This is analogous to the serial interval or generation
+time, but gives the time interval between someone being infected/infectious
+(in the simulation the latency period is assumed to be zero) and having
+contact with another individual.}
+
+\item{prob_infect}{A single \code{numeric} for the probability of a contact
+being infected.}
 
 \item{onset_to_hosp}{An \verb{<epidist>} object or anonymous function for
 the onset to hospitalisation delay distribution.}
@@ -36,10 +43,6 @@ the onset to hospitalisation delay distribution.}
 \item{onset_to_death}{An \verb{<epidist>} object or anonymous function for
 the onset to death delay distribution.}
 
-\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
-the contact distribution. This is similar to the offspring distribution,
-but describes the heterogeneity of contacts that are not infected.}
-
 \item{hosp_risk}{Either a single \code{numeric} for the hospitalisation risk of
 everyone in the population, or a \verb{<data.frame>} with age specific
 hospitalisation risks Default is 20\% hospitalisation (\code{0.2}) for the entire
@@ -125,7 +128,7 @@ that age group. Proportions must sum to one.
 }
 \examples{
 # load data required to simulate outbreak data
-serial_interval <- epiparameter::epidist(
+contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
   epi_dist = "serial interval",
   prob_distribution = "gamma",
@@ -155,10 +158,9 @@ contact_distribution <- epiparameter::epidist(
 
 outbreak <- sim_outbreak(
   R = 1.1,
-  serial_interval = serial_interval,
+  contact_interval = contact_interval,
   onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death,
-  contact_distribution = contact_distribution
+  onset_to_death = onset_to_death
 )
 }
 \author{

From 6de6d1344d6ba80313e51d244adde94d7b9408ad Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 09:51:59 +0000
Subject: [PATCH 19/50] fix typo converting contact_interval to function in
 sim_contacts

---
 R/sim_contacts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index 3dfa3a0e..26b0fd90 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -48,7 +48,7 @@ sim_contacts <- function(mean_contacts,
     "Input delay distributions need to be either functions or <epidist>" =
       inherits(contact_interval, c("function", "epidist"))
   )
-  contact_interval <- as.function(serial_interval, func_type = "generate")
+  contact_interval <- as.function(contact_interval, func_type = "generate")
 
   .check_sim_input(
     sim_type = "contacts",

From 02abe5fe7bb65d6d8b8b37360b34b0d8394b46dd Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 09:57:20 +0000
Subject: [PATCH 20/50] fixed examples and inherit doc in .sim_network_bp

---
 R/sim_contacts.R           | 16 +++++-----------
 R/sim_linelist.R           | 25 +++++++++++++------------
 R/sim_network_bp.R         |  9 +--------
 R/sim_outbreak.R           | 10 ++--------
 man/dot-check_sim_input.Rd | 10 +++++-----
 man/dot-sim_network_bp.Rd  | 15 ++++++++-------
 man/dot-sim_utils.Rd       | 10 +++++-----
 man/sim_contacts.Rd        | 26 ++++++++++----------------
 man/sim_linelist.Rd        | 24 +++++++++++++-----------
 man/sim_outbreak.Rd        | 20 +++++++-------------
 10 files changed, 69 insertions(+), 96 deletions(-)

diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index 26b0fd90..49510450 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -13,23 +13,17 @@
 #'
 #' @examples
 #' # load data required to simulate contacts
-#' serial_interval <- epiparameter::epidist(
+#' contact_interval <- epiparameter::epidist(
 #'   disease = "COVID-19",
-#'   epi_dist = "serial interval",
+#'   epi_dist = "contact interval",
 #'   prob_distribution = "gamma",
 #'   prob_distribution_params = c(shape = 1, scale = 1)
 #' )
 #'
-#' contact_distribution <- epiparameter::epidist(
-#'   disease = "COVID-19",
-#'   epi_dist = "contact_distribution",
-#'   prob_distribution = "pois",
-#'   prob_distribution_params = c(l = 5)
-#' )
-#'
 #' contacts <- sim_contacts(
-#'   R = 1.1,
-#'   serial_interval = serial_interval
+#'   mean_contacts = 2,
+#'   contact_interval = contact_interval,
+#'   prob_infect = 0.5
 #' )
 sim_contacts <- function(mean_contacts,
                          contact_interval,
diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index 4ccea45f..29fc98a1 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -20,16 +20,15 @@
 #' * `proportion`: a column with the proportion of the population that are in
 #' that age group. Proportions must sum to one.
 #'
-#'
 #' @param mean_contacts A single `numeric` for the mean number of contacts per
 #' infection.
 #' @param contact_interval An `<epidist>` object or anonymous function for
 #' the contact interval. This is analogous to the serial interval or generation
-#' time, but gives the time interval between someone being infected/infectious
-#' (in the simulation the latency period is assumed to be zero) and having
-#' contact with another individual.
-#' @param prob_infect A single `numeric` for the probability of a contact
-#' being infected.
+#' time, but defines the time interval between an individual being
+#' infected/infectious (in the simulation the latency period is assumed to be
+#' zero) and having contact with another susceptible individual.
+#' @param prob_infect A single `numeric` for the probability of a secondary
+#' contact being infected by an infected primary contact.
 #' @param onset_to_hosp An `<epidist>` object or anonymous function for
 #' the onset to hospitalisation delay distribution.
 #' @param onset_to_death An `<epidist>` object or anonymous function for
@@ -76,9 +75,9 @@
 #'
 #' @examples
 #' # load data required to simulate line list
-#' serial_interval <- epiparameter::epidist(
+#' contact_interval <- epiparameter::epidist(
 #'   disease = "COVID-19",
-#'   epi_dist = "serial interval",
+#'   epi_dist = "contact interval",
 #'   prob_distribution = "gamma",
 #'   prob_distribution_params = c(shape = 1, scale = 1)
 #' )
@@ -98,8 +97,9 @@
 #' )
 #' # example with single hospitalisation risk for entire population
 #' linelist <- sim_linelist(
-#'   R = 1.1,
-#'   serial_interval = serial_interval,
+#'   mean_contacts = 2,
+#'   contact_interval = contact_interval,
+#'   prob_infect = 0.5,
 #'   onset_to_hosp = onset_to_hosp,
 #'   onset_to_death = onset_to_death,
 #'   hosp_risk = 0.5
@@ -115,8 +115,9 @@
 #'   risk = c(0.1, 0.05, 0.2)
 #' )
 #' linelist <- sim_linelist(
-#'   R = 1.1,
-#'   serial_interval = serial_interval,
+#'   mean_contacts = 2,
+#'   contact_interval = contact_interval,
+#'   prob_infect = 0.5,
 #'   onset_to_hosp = onset_to_hosp,
 #'   onset_to_death = onset_to_death,
 #'   hosp_risk = age_dep_hosp_risk
diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 72d85289..848acac4 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -17,14 +17,7 @@
 #' of contacts. The plus one is because one of the contacts was "used" to
 #' infect the person.
 #'
-#' @param mean_contacts A `numeric` for the mean number of contacts across the
-#' population.
-#' @param contact_interval An `<epidist>` object or anonymous function for
-#' the contact interval. The contact interval is similar to the generation time
-#' or serial interval, but describes the time between becoming infected by a
-#' contact and contacting another susceptible individual.
-#' @param prob_infect A `numeric` for the probability of a secondary contact
-#' being infected by an infected primary contact.
+#' @inheritParams sim_linelist
 #'
 #' @return A `<data.frame>` with the contact and transmission chain data.
 #' @keywords internal
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index 6547d6ce..9f3f0e18 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -39,16 +39,10 @@
 #'   single_epidist = TRUE
 #' )
 #'
-#' contact_distribution <- epiparameter::epidist(
-#'   disease = "COVID-19",
-#'   epi_dist = "contact_distribution",
-#'   prob_distribution = "pois",
-#'   prob_distribution_params = c(l = 5)
-#' )
-#'
 #' outbreak <- sim_outbreak(
-#'   R = 1.1,
+#'   mean_contacts = 2,
 #'   contact_interval = contact_interval,
+#'   prob_infect = 0.5,
 #'   onset_to_hosp = onset_to_hosp,
 #'   onset_to_death = onset_to_death
 #' )
diff --git a/man/dot-check_sim_input.Rd b/man/dot-check_sim_input.Rd
index c10f5feb..533165a8 100644
--- a/man/dot-check_sim_input.Rd
+++ b/man/dot-check_sim_input.Rd
@@ -32,12 +32,12 @@ infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
-time, but gives the time interval between someone being infected/infectious
-(in the simulation the latency period is assumed to be zero) and having
-contact with another individual.}
+time, but defines the time interval between an individual being
+infected/infectious (in the simulation the latency period is assumed to be
+zero) and having contact with another susceptible individual.}
 
-\item{prob_infect}{A single \code{numeric} for the probability of a contact
-being infected.}
+\item{prob_infect}{A single \code{numeric} for the probability of a secondary
+contact being infected by an infected primary contact.}
 
 \item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
 
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
index d8e7c652..e526dbdb 100644
--- a/man/dot-sim_network_bp.Rd
+++ b/man/dot-sim_network_bp.Rd
@@ -8,16 +8,17 @@ infection for each contact}
 .sim_network_bp(mean_contacts, contact_interval, prob_infect)
 }
 \arguments{
-\item{mean_contacts}{A \code{numeric} for the mean number of contacts across the
-population.}
+\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
+infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
-the contact interval. The contact interval is similar to the generation time
-or serial interval, but describes the time between becoming infected by a
-contact and contacting another susceptible individual.}
+the contact interval. This is analogous to the serial interval or generation
+time, but defines the time interval between an individual being
+infected/infectious (in the simulation the latency period is assumed to be
+zero) and having contact with another susceptible individual.}
 
-\item{prob_infect}{A \code{numeric} for the probability of a secondary contact
-being infected by an infected primary contact.}
+\item{prob_infect}{A single \code{numeric} for the probability of a secondary
+contact being infected by an infected primary contact.}
 }
 \value{
 A \verb{<data.frame>} with the contact and transmission chain data.
diff --git a/man/dot-sim_utils.Rd b/man/dot-sim_utils.Rd
index da649e72..1e21c4d9 100644
--- a/man/dot-sim_utils.Rd
+++ b/man/dot-sim_utils.Rd
@@ -37,12 +37,12 @@ infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
-time, but gives the time interval between someone being infected/infectious
-(in the simulation the latency period is assumed to be zero) and having
-contact with another individual.}
+time, but defines the time interval between an individual being
+infected/infectious (in the simulation the latency period is assumed to be
+zero) and having contact with another susceptible individual.}
 
-\item{prob_infect}{A single \code{numeric} for the probability of a contact
-being infected.}
+\item{prob_infect}{A single \code{numeric} for the probability of a secondary
+contact being infected by an infected primary contact.}
 
 \item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
 
diff --git a/man/sim_contacts.Rd b/man/sim_contacts.Rd
index a143e0d1..f85561bb 100644
--- a/man/sim_contacts.Rd
+++ b/man/sim_contacts.Rd
@@ -22,12 +22,12 @@ infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
-time, but gives the time interval between someone being infected/infectious
-(in the simulation the latency period is assumed to be zero) and having
-contact with another individual.}
+time, but defines the time interval between an individual being
+infected/infectious (in the simulation the latency period is assumed to be
+zero) and having contact with another susceptible individual.}
 
-\item{prob_infect}{A single \code{numeric} for the probability of a contact
-being infected.}
+\item{prob_infect}{A single \code{numeric} for the probability of a secondary
+contact being infected by an infected primary contact.}
 
 \item{outbreak_start_date}{A \code{date} for the start of the outbreak.}
 
@@ -59,23 +59,17 @@ Simulate contacts for an infectious disease outbreak
 }
 \examples{
 # load data required to simulate contacts
-serial_interval <- epiparameter::epidist(
+contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
 
-contact_distribution <- epiparameter::epidist(
-  disease = "COVID-19",
-  epi_dist = "contact_distribution",
-  prob_distribution = "pois",
-  prob_distribution_params = c(l = 5)
-)
-
 contacts <- sim_contacts(
-  R = 1.1,
-  serial_interval = serial_interval
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5
 )
 }
 \author{
diff --git a/man/sim_linelist.Rd b/man/sim_linelist.Rd
index 9f1353e9..96bfb7a4 100644
--- a/man/sim_linelist.Rd
+++ b/man/sim_linelist.Rd
@@ -28,12 +28,12 @@ infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
-time, but gives the time interval between someone being infected/infectious
-(in the simulation the latency period is assumed to be zero) and having
-contact with another individual.}
+time, but defines the time interval between an individual being
+infected/infectious (in the simulation the latency period is assumed to be
+zero) and having contact with another susceptible individual.}
 
-\item{prob_infect}{A single \code{numeric} for the probability of a contact
-being infected.}
+\item{prob_infect}{A single \code{numeric} for the probability of a secondary
+contact being infected by an infected primary contact.}
 
 \item{onset_to_hosp}{An \verb{<epidist>} object or anonymous function for
 the onset to hospitalisation delay distribution.}
@@ -116,9 +116,9 @@ that age group. Proportions must sum to one.
 }
 \examples{
 # load data required to simulate line list
-serial_interval <- epiparameter::epidist(
+contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
@@ -138,8 +138,9 @@ onset_to_death <- epiparameter::epidist_db(
 )
 # example with single hospitalisation risk for entire population
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = 0.5
@@ -155,8 +156,9 @@ age_dep_hosp_risk <- data.frame(
   risk = c(0.1, 0.05, 0.2)
 )
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = age_dep_hosp_risk
diff --git a/man/sim_outbreak.Rd b/man/sim_outbreak.Rd
index 4d86e869..1de6a77a 100644
--- a/man/sim_outbreak.Rd
+++ b/man/sim_outbreak.Rd
@@ -30,12 +30,12 @@ infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
-time, but gives the time interval between someone being infected/infectious
-(in the simulation the latency period is assumed to be zero) and having
-contact with another individual.}
+time, but defines the time interval between an individual being
+infected/infectious (in the simulation the latency period is assumed to be
+zero) and having contact with another susceptible individual.}
 
-\item{prob_infect}{A single \code{numeric} for the probability of a contact
-being infected.}
+\item{prob_infect}{A single \code{numeric} for the probability of a secondary
+contact being infected by an infected primary contact.}
 
 \item{onset_to_hosp}{An \verb{<epidist>} object or anonymous function for
 the onset to hospitalisation delay distribution.}
@@ -149,16 +149,10 @@ onset_to_death <- epiparameter::epidist_db(
   single_epidist = TRUE
 )
 
-contact_distribution <- epiparameter::epidist(
-  disease = "COVID-19",
-  epi_dist = "contact_distribution",
-  prob_distribution = "pois",
-  prob_distribution_params = c(l = 5)
-)
-
 outbreak <- sim_outbreak(
-  R = 1.1,
+  mean_contacts = 2,
   contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )

From bdcbff7ab1ca6d5316fbccf5ca48f095c017ed2c Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 10:39:42 +0000
Subject: [PATCH 21/50] added tests for .sim_network_bp

---
 tests/testthat/test-sim_network_bp.R | 40 ++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 tests/testthat/test-sim_network_bp.R

diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R
new file mode 100644
index 00000000..56b29d47
--- /dev/null
+++ b/tests/testthat/test-sim_network_bp.R
@@ -0,0 +1,40 @@
+suppressMessages({
+  contact_interval <- as.function(
+    epiparameter::epidist(
+      disease = "COVID-19",
+      epi_dist = "contact interval",
+      prob_distribution = "gamma",
+      prob_distribution_params = c(shape = 1, scale = 1)
+    ), func_type = "generate"
+  )
+})
+
+test_that(".sim_network_bp works as expected", {
+  set.seed(1)
+  res <- .sim_network_bp(
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5
+  )
+  expect_s3_class(res, class = "data.frame")
+  expect_identical(dim(res), c(10L, 5L))
+  expect_identical(
+    colnames(res),
+    c("id", "ancestor", "generation", "infected", "time")
+  )
+})
+
+test_that(".sim_network_bp works as expected with no contacts", {
+  set.seed(4)
+  res <- .sim_network_bp(
+    mean_contacts = 1,
+    contact_interval = contact_interval,
+    prob_infect = 0.5
+  )
+  expect_s3_class(res, class = "data.frame")
+  expect_identical(dim(res), c(1L, 5L))
+  expect_identical(
+    colnames(res),
+    c("id", "ancestor", "generation", "infected", "time")
+  )
+})

From 4b1ca9d54a0764c41e1ba359cfe828a912b72057 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 10:40:21 +0000
Subject: [PATCH 22/50] added linelist row filtering to sim_outbreak

---
 R/sim_outbreak.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index 9f3f0e18..d9986c30 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -157,6 +157,7 @@ sim_outbreak <- function(mean_contacts,
     contact_tracing_status_probs = contact_tracing_status_probs
   )
 
+  linelist$chain <- linelist$chain[linelist$chain$infected == "infected", ]
   chain <- linelist$chain[, linelist$cols]
 
   # return outbreak data

From 8d09db739a8aeae28f62e75db272fc24b8f01d56 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 10:40:49 +0000
Subject: [PATCH 23/50] reduced maximum iterations for simulation conditioning

---
 R/sim_utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sim_utils.R b/R/sim_utils.R
index b6302094..3063892d 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -32,7 +32,7 @@ NULL
     )
     outbreak_size <- sum(chain$infected == "infected")
     max_iter <- max_iter + 1L
-    if (max_iter >= 1e4) {
+    if (max_iter >= 1e3) {
       stop(
         "Exceeded maximum number of iterations for simulating outbreak. \n",
         "Change input parameters or min_outbreak_size.",

From 8f6dd78f7aea4b9cd829a8af3dada96cb14a53a1 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 17 Jan 2024 10:41:13 +0000
Subject: [PATCH 24/50] updated tests to pass with new arguments

---
 tests/testthat/test-checkers.R     | 46 +++++++--------
 tests/testthat/test-sim_contacts.R | 53 ++++++++----------
 tests/testthat/test-sim_linelist.R | 89 +++++++++++++++++-------------
 tests/testthat/test-sim_outbreak.R | 61 +++++++++-----------
 4 files changed, 122 insertions(+), 127 deletions(-)

diff --git a/tests/testthat/test-checkers.R b/tests/testthat/test-checkers.R
index 9506b6be..59305d67 100644
--- a/tests/testthat/test-checkers.R
+++ b/tests/testthat/test-checkers.R
@@ -135,9 +135,9 @@ test_that(".check_age_df fails as expected", {
 })
 
 suppressMessages({
-  serial_interval <- as.function(epiparameter::epidist(
+  contact_interval <- as.function(epiparameter::epidist(
     disease = "COVID-19",
-    epi_dist = "serial interval",
+    epi_dist = "contact interval",
     prob_distribution = "gamma",
     prob_distribution_params = c(shape = 1, scale = 1)
   ))
@@ -155,25 +155,18 @@ suppressMessages({
     epi_dist = "onset to death",
     single_epidist = TRUE
   ))
-
-  contact_distribution <- as.function(epiparameter::epidist(
-    disease = "COVID-19",
-    epi_dist = "contact_distribution",
-    prob_distribution = "pois",
-    prob_distribution_params = c(l = 5)
-  ))
 })
 
 test_that(".check_sim_input works as expected", {
   chk <- .check_sim_input(
     sim_type = "outbreak",
-    R = 1,
-    serial_interval = serial_interval,
+    mean_contacts = 1,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     outbreak_start_date = as.Date("2023-01-01"),
     min_outbreak_size = 10,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
-    contact_distribution = contact_distribution,
     add_names = TRUE,
     add_ct = FALSE,
     case_type_probs = c(
@@ -196,8 +189,9 @@ test_that(".check_sim_input works as expected", {
 
   chk <- .check_sim_input(
     sim_type = "linelist",
-    R = 1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     outbreak_start_date = as.Date("2023-01-01"),
     min_outbreak_size = 10,
     onset_to_hosp = onset_to_hosp,
@@ -219,11 +213,11 @@ test_that(".check_sim_input works as expected", {
 
   chk <- .check_sim_input(
     sim_type = "contacts",
-    R = 1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     outbreak_start_date = as.Date("2023-01-01"),
     min_outbreak_size = 10,
-    contact_distribution = contact_distribution,
     contact_tracing_status_probs = c(
       under_followup = 0.7,
       lost_to_followup = 0.2,
@@ -241,18 +235,19 @@ test_that(".check_sim_input fails as expected", {
     regexp = "(arg)*(should be one of)*(linelist)*(contacts)*(outbreak)"
   )
   expect_error(
-    .check_sim_input(sim_type = "outbreak", R = -1),
-    regexp = "(Assertion on)*(R)*(failed)"
+    .check_sim_input(sim_type = "outbreak", mean_contacts = 0, prob_infect = 0.5),
+    regexp = "(Assertion on)*(mean_contacts)*(failed)"
   )
   expect_error(
-    .check_sim_input(sim_type = "outbreak", R = 1, serial_interval = list()),
-    regexp = "(Assertion on)*(serial_interval)*(failed)"
+    .check_sim_input(sim_type = "outbreak", mean_contacts = 2, contact_interval = list(), prob_infect = 0.5),
+    regexp = "(Assertion on)*(contact_interval)*(failed)"
   )
   expect_error(
     .check_sim_input(
       sim_type = "outbreak",
-      R = 1,
-      serial_interval = serial_interval,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       outbreak_start_date = "01-01-2023"
     ),
     regexp = "(Assertion on)*(outbreak_start_date)*(failed)"
@@ -260,8 +255,9 @@ test_that(".check_sim_input fails as expected", {
   expect_error(
     .check_sim_input(
       sim_type = "outbreak",
-      R = 1,
-      serial_interval = serial_interval,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       outbreak_start_date = as.Date("2023-01-01"),
       min_outbreak_size = "10"
     ),
diff --git a/tests/testthat/test-sim_contacts.R b/tests/testthat/test-sim_contacts.R
index 861d1ff1..ade7eec5 100644
--- a/tests/testthat/test-sim_contacts.R
+++ b/tests/testthat/test-sim_contacts.R
@@ -1,33 +1,26 @@
 suppressMessages({
-  serial_interval <- epiparameter::epidist(
+  contact_interval <- epiparameter::epidist(
     disease = "COVID-19",
-    epi_dist = "serial interval",
+    epi_dist = "contact interval",
     prob_distribution = "gamma",
     prob_distribution_params = c(shape = 1, scale = 1)
   )
-
-  contact_distribution <- epiparameter::epidist(
-    disease = "COVID-19",
-    epi_dist = "contact_distribution",
-    prob_distribution = "pois",
-    prob_distribution_params = c(l = 5)
-  )
 })
 
 test_that("sim_contacts works as expected", {
   set.seed(1)
   contacts <- sim_contacts(
-    R = 1.1,
-    serial_interval = serial_interval,
-    contact_distribution = contact_distribution
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5
   )
 
   expect_s3_class(contacts, class = "data.frame")
-  expect_identical(dim(contacts), c(170L, 8L))
+  expect_identical(dim(contacts), c(35L, 8L))
   expect_identical(
     colnames(contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )
@@ -36,9 +29,9 @@ test_that("sim_contacts works as expected", {
 test_that("sim_contacts works as expected with modified config", {
   set.seed(1)
   contacts <- sim_contacts(
-    R = 1.1,
-    serial_interval = serial_interval,
-    contact_distribution = contact_distribution,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     config = create_config(
       last_contact_distribution = "geom",
       last_contact_distribution_params = c(prob = 0.5)
@@ -46,11 +39,11 @@ test_that("sim_contacts works as expected with modified config", {
   )
 
   expect_s3_class(contacts, class = "data.frame")
-  expect_identical(dim(contacts), c(178L, 8L))
+  expect_identical(dim(contacts), c(35L, 8L))
   expect_identical(
     colnames(contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )
@@ -59,20 +52,20 @@ test_that("sim_contacts works as expected with modified config", {
 test_that("sim_contacts works as expected with modified config parameters", {
   set.seed(1)
   contacts <- sim_contacts(
-    R = 1.1,
-    serial_interval = serial_interval,
-    contact_distribution = contact_distribution,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     config = create_config(
       last_contact_distribution_params = c(lambda = 5)
     )
   )
 
   expect_s3_class(contacts, class = "data.frame")
-  expect_identical(dim(contacts), c(170L, 8L)) # TODO check why nrow is diff
+  expect_identical(dim(contacts), c(35L, 8L))
   expect_identical(
     colnames(contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )
@@ -81,9 +74,9 @@ test_that("sim_contacts works as expected with modified config parameters", {
 test_that("sim_contacts fails as expected with modified config", {
   expect_error(
     sim_contacts(
-      R = 1.1,
-      serial_interval = serial_interval,
-      contact_distribution = contact_distribution,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       config = create_config(
         last_contact_distribution = "geom"
       )
@@ -95,9 +88,9 @@ test_that("sim_contacts fails as expected with modified config", {
 test_that("sim_contacts fails as expected with empty config", {
   expect_error(
     sim_contacts(
-      R = 1.1,
-      serial_interval = serial_interval,
-      contact_distribution = contact_distribution,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       config = list()
     ),
     regexp = "Distribution parameters are missing, check config"
diff --git a/tests/testthat/test-sim_linelist.R b/tests/testthat/test-sim_linelist.R
index 9b5c6919..8aadc14a 100644
--- a/tests/testthat/test-sim_linelist.R
+++ b/tests/testthat/test-sim_linelist.R
@@ -1,7 +1,7 @@
 suppressMessages({
-  serial_interval <- epiparameter::epidist(
+  contact_interval <- epiparameter::epidist(
     disease = "COVID-19",
-    epi_dist = "serial interval",
+    epi_dist = "contact interval",
     prob_distribution = "gamma",
     prob_distribution_params = c(shape = 1, scale = 1)
   )
@@ -24,14 +24,15 @@ suppressMessages({
 test_that("sim_linelist works as expected", {
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 11L))
+  expect_identical(dim(linelist), c(17L, 11L))
   expect_identical(
     colnames(linelist),
     c(
@@ -57,8 +58,9 @@ test_that("sim_linelist works as expected with age-strat risks", {
   )
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     hosp_risk = age_dep_hosp_risk,
@@ -67,7 +69,7 @@ test_that("sim_linelist works as expected with age-strat risks", {
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 11L))
+  expect_identical(dim(linelist), c(17L, 11L))
   expect_identical(
     colnames(linelist),
     c(
@@ -81,15 +83,16 @@ test_that("sim_linelist works as expected with age-strat risks", {
 test_that("sim_linelist works as expected without Ct", {
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     add_ct = FALSE
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 10L))
+  expect_identical(dim(linelist), c(17L, 10L))
   expect_identical(
     colnames(linelist),
     c(
@@ -103,15 +106,16 @@ test_that("sim_linelist works as expected without Ct", {
 test_that("sim_linelist works as expected with anonymous", {
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     add_names = FALSE
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 10L))
+  expect_identical(dim(linelist), c(17L, 10L))
   expect_identical(
     colnames(linelist),
     c(
@@ -130,15 +134,16 @@ test_that("sim_linelist works as expected with age structure", {
   )
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     population_age = age_struct
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 11L))
+  expect_identical(dim(linelist), c(17L, 11L))
   expect_identical(
     colnames(linelist),
     c(
@@ -161,8 +166,9 @@ test_that("sim_linelist works as expected with age-strat risks & age struct", {
   )
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     hosp_risk = age_dep_hosp_risk,
@@ -170,7 +176,7 @@ test_that("sim_linelist works as expected with age-strat risks & age struct", {
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 11L))
+  expect_identical(dim(linelist), c(17L, 11L))
   expect_identical(
     colnames(linelist),
     c(
@@ -187,10 +193,11 @@ test_that("sim_linelist gives expected proportion of ages with age struct", {
     proportion = c(0.2, 0.5, 0.3),
     stringsAsFactors = FALSE
   )
-  set.seed(1)
+  set.seed(3)
   linelist <- sim_linelist(
-    R = 1.5,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     population_age = age_struct
@@ -218,8 +225,9 @@ test_that("sim_linelist gives expected proportion of ages with age struct", {
 test_that("sim_linelist works as expected with modified config", {
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     config = create_config(
@@ -229,7 +237,7 @@ test_that("sim_linelist works as expected with modified config", {
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 11L))
+  expect_identical(dim(linelist), c(17L, 11L))
   expect_identical(
     colnames(linelist),
     c(
@@ -243,8 +251,9 @@ test_that("sim_linelist works as expected with modified config", {
 test_that("sim_linelist works as expected with modified config parameters", {
   set.seed(1)
   linelist <- sim_linelist(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
     config = create_config(
@@ -253,7 +262,7 @@ test_that("sim_linelist works as expected with modified config parameters", {
   )
 
   expect_s3_class(linelist, class = "data.frame")
-  expect_identical(dim(linelist), c(42L, 11L))
+  expect_identical(dim(linelist), c(17L, 11L))
   expect_identical(
     colnames(linelist),
     c(
@@ -267,8 +276,9 @@ test_that("sim_linelist works as expected with modified config parameters", {
 test_that("sim_linelist fails as expected with modified config", {
   expect_error(
     sim_linelist(
-      R = 1.1,
-      serial_interval = serial_interval,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       onset_to_hosp = onset_to_hosp,
       onset_to_death = onset_to_death,
       config = create_config(
@@ -280,8 +290,9 @@ test_that("sim_linelist fails as expected with modified config", {
 
   expect_error(
     sim_linelist(
-      R = 1.1,
-      serial_interval = serial_interval,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       onset_to_hosp = onset_to_hosp,
       onset_to_death = onset_to_death,
       add_ct = TRUE,
@@ -296,8 +307,9 @@ test_that("sim_linelist fails as expected with modified config", {
 test_that("sim_linelist fails as expected with empty config", {
   expect_error(
     sim_linelist(
-      R = 1.1,
-      serial_interval = serial_interval,
+      mean_contacts = 2,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
       onset_to_hosp = onset_to_hosp,
       onset_to_death = onset_to_death,
       config = list()
@@ -306,12 +318,13 @@ test_that("sim_linelist fails as expected with empty config", {
   )
 })
 
-test_that("sim_linelist fails as expected exceeding max iter for bpmodel", {
+test_that("sim_linelist fails as expected exceeding max iter for bp model", {
   set.seed(1)
   expect_error(
     sim_linelist(
-      R = 0.2,
-      serial_interval = serial_interval,
+      mean_contacts = 1,
+      contact_interval = contact_interval,
+      prob_infect = 0.1,
       onset_to_hosp = onset_to_hosp,
       onset_to_death = onset_to_death
     ),
diff --git a/tests/testthat/test-sim_outbreak.R b/tests/testthat/test-sim_outbreak.R
index 90e1c99c..d8892351 100644
--- a/tests/testthat/test-sim_outbreak.R
+++ b/tests/testthat/test-sim_outbreak.R
@@ -1,7 +1,7 @@
 suppressMessages({
-  serial_interval <- epiparameter::epidist(
+  contact_interval <- epiparameter::epidist(
     disease = "COVID-19",
-    epi_dist = "serial interval",
+    epi_dist = "contact interval",
     prob_distribution = "gamma",
     prob_distribution_params = c(shape = 1, scale = 1)
   )
@@ -19,30 +19,23 @@ suppressMessages({
     epi_dist = "onset to death",
     single_epidist = TRUE
   )
-
-  contact_distribution <- epiparameter::epidist(
-    disease = "COVID-19",
-    epi_dist = "contact_distribution",
-    prob_distribution = "pois",
-    prob_distribution_params = c(l = 5)
-  )
 })
 
 test_that("sim_outbreak works as expected", {
   set.seed(1)
   outbreak <- sim_outbreak(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
-    onset_to_death = onset_to_death,
-    contact_distribution = contact_distribution
+    onset_to_death = onset_to_death
   )
 
   expect_type(outbreak, type = "list")
   expect_s3_class(outbreak$linelist, class = "data.frame")
   expect_s3_class(outbreak$contacts, class = "data.frame")
-  expect_identical(dim(outbreak$linelist), c(42L, 11L))
-  expect_identical(dim(outbreak$contacts), c(172L, 8L))
+  expect_identical(dim(outbreak$linelist), c(17L, 11L))
+  expect_identical(dim(outbreak$contacts), c(35L, 8L))
   expect_identical(
     colnames(outbreak$linelist),
     c(
@@ -54,7 +47,7 @@ test_that("sim_outbreak works as expected", {
   expect_identical(
     colnames(outbreak$contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )
@@ -63,19 +56,19 @@ test_that("sim_outbreak works as expected", {
 test_that("sim_outbreak works as expected with add_names = FALSE", {
   set.seed(1)
   outbreak <- sim_outbreak(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
-    contact_distribution = contact_distribution,
     add_names = FALSE
   )
 
   expect_type(outbreak, type = "list")
   expect_s3_class(outbreak$linelist, class = "data.frame")
   expect_s3_class(outbreak$contacts, class = "data.frame")
-  expect_identical(dim(outbreak$linelist), c(42L, 10L))
-  expect_identical(dim(outbreak$contacts), c(170L, 8L))
+  expect_identical(dim(outbreak$linelist), c(17L, 10L))
+  expect_identical(dim(outbreak$contacts), c(35L, 8L))
   expect_identical(
     colnames(outbreak$linelist),
     c(
@@ -87,7 +80,7 @@ test_that("sim_outbreak works as expected with add_names = FALSE", {
   expect_identical(
     colnames(outbreak$contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )
@@ -108,11 +101,11 @@ test_that("sim_outbreak works as expected with age-strat risks", {
   )
   set.seed(1)
   outbreak <- sim_outbreak(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
-    contact_distribution = contact_distribution,
     hosp_risk = age_dep_hosp_risk,
     hosp_death_risk = age_dep_hosp_death_risk,
     non_hosp_death_risk = age_dep_non_hosp_death_risk
@@ -121,8 +114,8 @@ test_that("sim_outbreak works as expected with age-strat risks", {
   expect_type(outbreak, type = "list")
   expect_s3_class(outbreak$linelist, class = "data.frame")
   expect_s3_class(outbreak$contacts, class = "data.frame")
-  expect_identical(dim(outbreak$linelist), c(42L, 11L))
-  expect_identical(dim(outbreak$contacts), c(173L, 8L))
+  expect_identical(dim(outbreak$linelist), c(17L, 11L))
+  expect_identical(dim(outbreak$contacts), c(35L, 8L))
   expect_identical(
     colnames(outbreak$linelist),
     c(
@@ -134,7 +127,7 @@ test_that("sim_outbreak works as expected with age-strat risks", {
   expect_identical(
     colnames(outbreak$contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )
@@ -148,19 +141,19 @@ test_that("sim_outbreak works as expected with age structure", {
   )
   set.seed(1)
   outbreak <- sim_outbreak(
-    R = 1.1,
-    serial_interval = serial_interval,
+    mean_contacts = 2,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
     onset_to_death = onset_to_death,
-    contact_distribution = contact_distribution,
     population_age = age_struct
   )
 
   expect_type(outbreak, type = "list")
   expect_s3_class(outbreak$linelist, class = "data.frame")
   expect_s3_class(outbreak$contacts, class = "data.frame")
-  expect_identical(dim(outbreak$linelist), c(42L, 11L))
-  expect_identical(dim(outbreak$contacts), c(175L, 8L))
+  expect_identical(dim(outbreak$linelist), c(17L, 11L))
+  expect_identical(dim(outbreak$contacts), c(35L, 8L))
   expect_identical(
     colnames(outbreak$linelist),
     c(
@@ -172,7 +165,7 @@ test_that("sim_outbreak works as expected with age structure", {
   expect_identical(
     colnames(outbreak$contacts),
     c(
-      "from", "to", "cnt_age", "cnt_gender", "date_first_contact",
+      "from", "to", "age", "gender", "date_first_contact",
       "date_last_contact", "was_case", "status"
     )
   )

From 735ec13a5e849dae5c02a4f9c1078f1f705f1fa3 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 18 Jan 2024 15:35:25 +0000
Subject: [PATCH 25/50] pass add_names argument to .sim_network_bp

---
 R/sim_contacts.R                     |  1 +
 R/sim_linelist.R                     |  1 +
 R/sim_network_bp.R                   | 19 ++++++++++++++++---
 R/sim_outbreak.R                     |  1 +
 R/sim_utils.R                        |  4 +++-
 man/dot-sim_network_bp.Rd            |  5 ++++-
 man/dot-sim_utils.Rd                 |  7 ++++---
 tests/testthat/test-sim_network_bp.R |  6 ++++--
 8 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index 49510450..07428492 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -62,6 +62,7 @@ sim_contacts <- function(mean_contacts,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
+    add_names = TRUE,
     config = config
   )
 
diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index 29fc98a1..e611f313 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -206,6 +206,7 @@ sim_linelist <- function(mean_contacts,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
+    add_names = add_names,
     config = config
   )
 
diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 848acac4..c4799e2f 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -21,7 +21,10 @@
 #'
 #' @return A `<data.frame>` with the contact and transmission chain data.
 #' @keywords internal
-.sim_network_bp <- function(mean_contacts, contact_interval, prob_infect) {
+.sim_network_bp <- function(mean_contacts,
+                            contact_interval,
+                            prob_infect,
+                            add_names) {
 
   # initialise data object
   ancestor <- vector(mode = "integer", 1e5)
@@ -47,7 +50,7 @@
   # run loop until no more individuals are sampled
   while (next_gen_size > 0) {
     # sample contact distribution
-    q <- dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
+    q <- stats::dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
     q <- q / sum(q)
     contacts <- sample(0:100, size = next_gen_size, replace = TRUE, prob = q)
 
@@ -57,6 +60,16 @@
       chain_length <- chain_length + any(contacts >= 1L)
       chain_generation <- chain_generation + 1L
 
+      if (add_names && chain_size > 5180) {
+        warning(
+          "Returning early as total number of contacts exceeds number of names ",
+          "available from {randomNames}. \n If you want to simulate a larger ",
+          "line lists set add_names = FALSE",
+          call. = FALSE
+        )
+        break
+      }
+
       for (i in seq_along(contacts)) {
 
         if (contacts[i] > 0) {
@@ -77,7 +90,7 @@
           ancestor[vec_idx] <- ancestor_idx[i]
 
           # sample infected contacts
-          infect <- rbinom(n = contacts[i], size = 1, prob = prob_infect)
+          infect <- stats::rbinom(n = contacts[i], size = 1, prob = prob_infect)
           infected[vec_idx] <- as.numeric(infect)
 
           # add delay time, removing first element of ancestor time as it is NA
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index d9986c30..f4a619c2 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -135,6 +135,7 @@ sim_outbreak <- function(mean_contacts,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
+    add_names = add_names,
     config = config
   )
 
diff --git a/R/sim_utils.R b/R/sim_utils.R
index 3063892d..8d010996 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -20,6 +20,7 @@ NULL
                              outbreak_start_date,
                              min_outbreak_size,
                              population_age,
+                             add_names,
                              config) {
   outbreak_size <- 0
   max_iter <- 0L
@@ -28,7 +29,8 @@ NULL
     chain <- .sim_network_bp(
       mean_contacts = mean_contacts,
       contact_interval = contact_interval,
-      prob_infect = prob_infect
+      prob_infect = prob_infect,
+      add_names = add_names
     )
     outbreak_size <- sum(chain$infected == "infected")
     max_iter <- max_iter + 1L
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
index e526dbdb..daab90a0 100644
--- a/man/dot-sim_network_bp.Rd
+++ b/man/dot-sim_network_bp.Rd
@@ -5,7 +5,7 @@
 \title{Simulate a random network branching process model with a probability of
 infection for each contact}
 \usage{
-.sim_network_bp(mean_contacts, contact_interval, prob_infect)
+.sim_network_bp(mean_contacts, contact_interval, prob_infect, add_names)
 }
 \arguments{
 \item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
@@ -19,6 +19,9 @@ zero) and having contact with another susceptible individual.}
 
 \item{prob_infect}{A single \code{numeric} for the probability of a secondary
 contact being infected by an infected primary contact.}
+
+\item{add_names}{A \code{logical} boolean for whether to add names to each row
+of the line list. Default is \code{TRUE}.}
 }
 \value{
 A \verb{<data.frame>} with the contact and transmission chain data.
diff --git a/man/dot-sim_utils.Rd b/man/dot-sim_utils.Rd
index 1e21c4d9..49c5a0ba 100644
--- a/man/dot-sim_utils.Rd
+++ b/man/dot-sim_utils.Rd
@@ -14,6 +14,7 @@ within \pkg{simulist}}
   outbreak_start_date,
   min_outbreak_size,
   population_age,
+  add_names,
   config
 )
 
@@ -58,6 +59,9 @@ range (both inclusive, i.e. [lower, upper]). The \verb{<data.frame>} with
 age groups and the proportion of the population in that group. See details
 and examples for more information.}
 
+\item{add_names}{A \code{logical} boolean for whether to add names to each row
+of the line list. Default is \code{TRUE}.}
+
 \item{config}{A list of settings to adjust the randomly sampled delays and
 Ct values (if \code{add_ct = TRUE}). See \code{\link[=create_config]{create_config()}} for more information.}
 
@@ -84,9 +88,6 @@ specific death risks outside of hospitals. Default is 5\% death risk outside
 of hospitals  (\code{0.05}) for the entire population. See details and examples
 for more information.}
 
-\item{add_names}{A \code{logical} boolean for whether to add names to each row
-of the line list. Default is \code{TRUE}.}
-
 \item{add_ct}{A \code{logical} boolean for whether to add Ct values to each row
 of the line list. Default is \code{TRUE}.}
 
diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R
index 56b29d47..bc132815 100644
--- a/tests/testthat/test-sim_network_bp.R
+++ b/tests/testthat/test-sim_network_bp.R
@@ -14,7 +14,8 @@ test_that(".sim_network_bp works as expected", {
   res <- .sim_network_bp(
     mean_contacts = 2,
     contact_interval = contact_interval,
-    prob_infect = 0.5
+    prob_infect = 0.5,
+    add_names = TRUE
   )
   expect_s3_class(res, class = "data.frame")
   expect_identical(dim(res), c(10L, 5L))
@@ -29,7 +30,8 @@ test_that(".sim_network_bp works as expected with no contacts", {
   res <- .sim_network_bp(
     mean_contacts = 1,
     contact_interval = contact_interval,
-    prob_infect = 0.5
+    prob_infect = 0.5,
+    add_names = TRUE
   )
   expect_s3_class(res, class = "data.frame")
   expect_identical(dim(res), c(1L, 5L))

From c9f8bc5b2dbd1e83000519a5d5c559218c7fbf87 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 18 Jan 2024 15:36:35 +0000
Subject: [PATCH 26/50] update simulation functions to use new arguments in
 vignettes

---
 vignettes/age-strat-risks.Rmd | 44 +++++++++++--------
 vignettes/age-struct-pop.Rmd  | 34 +++++++++------
 vignettes/simulist.Rmd        | 82 ++++++++++++++++++-----------------
 vignettes/vis-linelist.Rmd    | 37 ++++++----------
 4 files changed, 103 insertions(+), 94 deletions(-)

diff --git a/vignettes/age-strat-risks.Rmd b/vignettes/age-strat-risks.Rmd
index 12336b91..d0a4d671 100644
--- a/vignettes/age-strat-risks.Rmd
+++ b/vignettes/age-strat-risks.Rmd
@@ -44,9 +44,9 @@ library(epiparameter)
 Here is an example that uses the default hospitalisation and death risks (inside and outside of hospital). First we load the requested delay distributions using the {epiparameter} package.
 
 ```{r, read-delay-dists}
-serial_interval <- epidist(
+contact_interval <- epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
@@ -82,8 +82,9 @@ Simulate a line list with population-wide default risks:
 
 ```{r, sim-linelist}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )
@@ -96,8 +97,9 @@ We can run another simulation and change the hospitalisation and death risks, in
 
 ```{r, sim-linelist-diff-risks}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = 0.4,
@@ -130,8 +132,9 @@ age_dep_hosp_risk
 
 ```{r, sim-age-strat-linelist}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = age_dep_hosp_risk
@@ -158,8 +161,9 @@ age_dep_hosp_risk <- data.frame(
 age_dep_hosp_risk
 
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = age_dep_hosp_risk
@@ -175,8 +179,9 @@ age_dep_hosp_risk <- data.frame(
 )
 
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = age_dep_hosp_risk,
@@ -198,8 +203,9 @@ age_dep_hosp_death_risk <- data.frame(
 age_dep_hosp_death_risk
 
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_death_risk = age_dep_hosp_death_risk
@@ -214,8 +220,9 @@ age_dep_non_hosp_death_risk <- data.frame(
 age_dep_non_hosp_death_risk
 
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   non_hosp_death_risk = age_dep_non_hosp_death_risk
@@ -239,8 +246,9 @@ age_dep_non_hosp_death_risk <- data.frame(
 )
 
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = age_dep_hosp_risk,
diff --git a/vignettes/age-struct-pop.Rmd b/vignettes/age-struct-pop.Rmd
index 0b5f9b9b..207b4c97 100644
--- a/vignettes/age-struct-pop.Rmd
+++ b/vignettes/age-struct-pop.Rmd
@@ -24,7 +24,7 @@ If you are unfamiliar with the {simulist} package or the `sim_linelist()` functi
 
 This vignette describes how to simulate a line list with either a uniform or non-uniform distribution of ages within the population. This population age structure may either describe the population of a region as a whole, or specifically which age groups are more likely to become infected than others, for instance because they have more social contacts.
 
-The {simulist} package uses a [branching process](https://epiverse-trace.github.io/bpmodels/reference/chain_sim.html) which is independent of population age structure to simulate the line list, and then {simulist} _paints_ the demographic information onto the infected individuals (and in the case of `sim_outbreak()` or `sim_contacts()` the contacts of infected individuals). In other words, once the cases in the line list have been simulated the ages are assigned to each individual post hoc, specified by the age range, and age structure, if supplied.
+The {simulist} package uses a branching process (using a random network model of contacts), which is independent of population age structure to simulate the line list, and then {simulist} _paints_ the demographic information onto the infected individuals (and in the case of `sim_outbreak()` or `sim_contacts()` the contacts of infected individuals). In other words, once the cases in the line list have been simulated the ages are assigned to each individual post hoc, specified by the age range, and age structure, if supplied.
 
 The age range and age structure for the simulation functions (`sim_*()`) in {simulist} is controlled by the `population_age` argument. The default age structure in {simulist} is a uniform structure between a lower and upper age range. The `sim_linelist()` function (and other `sim_*()` functions) can accept a `<data.frame>` instead of the `numeric` vector to specify the age structure for specified age groups. This feature can be especially useful when wanting to simulate an outbreak in a region with a heavily non-uniform age structure, [for example younger populations such as Nigeria, or older populations such as Japan](https://ourworldindata.org/age-structure). 
 
@@ -37,9 +37,9 @@ library(ggplot2)
 First we load the requested delay distributions using the {epiparameter} package. The onset-to-hospitalisation and onset-to-death delay distributions are extracted from the {epiparameter} database, and the serial interval is manually defined as it is not yet available from the {epiparameter} database.
 
 ```{r, read-delay-dists}
-serial_interval <- epidist(
+contact_interval <- epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
@@ -69,15 +69,19 @@ set.seed(1)
 
 By default `sim_linelist()` simulates individuals ages assuming a uniform distribution between 1 and 90. To change this age range, a vector of two numbers can be supplied to the `population_age` argument. Here we simulated an outbreak in a population with a population ranging from 5 to 75 (inclusive, `[5,75]`).
 
-***Note***: all ages are assumed to be in the unit of years and need to be integers (or at least ["integerish"](https://rlang.r-lib.org/reference/is_integerish.html) if stored as double)
+***Note***: all ages are assumed to be in the unit of years and need to be integers (or at least ["integerish"](https://rlang.r-lib.org/reference/is_integerish.html) if stored as double).
+
+All simulations in this vignette condition the simulation to have a minimum outbreak size (`min_outbreak_size`) of 100 cases to clearly visualise the distribution of ages.
 
 ```{r sim-linelist-age-range}
 linelist <- sim_linelist(
-  R = 1.3,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.45,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
-  population_age = c(5, 75)
+  population_age = c(5, 75),
+  min_outbreak_size = 100
 )
 head(linelist)
 ```
@@ -116,11 +120,13 @@ age_struct
 
 ```{r, sim-age-struct-linelist}
 linelist <- sim_linelist(
-  R = 1.3,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.45,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
-  population_age = age_struct
+  population_age = age_struct,
+  min_outbreak_size = 100
 )
 
 head(linelist)
@@ -167,11 +173,13 @@ age_struct
 
 ```{r, sim-age-struct-linelist-young}
 linelist <- sim_linelist(
-  R = 1.3,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.45,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
-  population_age = age_struct
+  population_age = age_struct,
+  min_outbreak_size = 100
 )
 
 head(linelist)
diff --git a/vignettes/simulist.Rmd b/vignettes/simulist.Rmd
index cda19a90..f1955eb8 100644
--- a/vignettes/simulist.Rmd
+++ b/vignettes/simulist.Rmd
@@ -20,7 +20,7 @@ knitr::opts_chunk$set(
 
 This is an introductory vignette to the {simulist} R package. {simulist} simulates two types of common epidemiological data collected during infectious disease outbreaks: 1) a line list, which provides individual-level descriptions of cases in an outbreak; 2) a contact dataset, which provides details of which others individuals were in contact with each of the cases.
 
-The main function in the {simulist} package is `sim_linelist()`. This functions takes in arguments that control the dynamics of the outbreak, such as the serial interval, and outputs a line list table (`<data.frame>`) with case information for each infected individual.
+The main function in the {simulist} package is `sim_linelist()`. This functions takes in arguments that control the dynamics of the outbreak, such as the contact interval (i.e. time delay between contacts), and outputs a line list table (`<data.frame>`) with case information for each infected individual.
 
 For this introduction we will simulate a line list for the early stages of a COVID-19 (SARS-CoV-2) outbreak. This will require two R packages: {simulist}, to produce the line list, and {epiparameter} to provide epidemiological parameters, such as onset-to-death delays.
 
@@ -32,11 +32,10 @@ library(epiparameter)
 First we load in some data that is required for the line list simulation. Data on epidemiological parameters and distributions are read from the {epiparameter} R package.
 
 ```{r read-epidist}
-# get serial interval from {epiparameter} database
-
-serial_interval <- epidist(
+# create contact interval (not available from {epiparameter} database)
+contact_interval <- epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
@@ -56,12 +55,19 @@ onset_to_death <- epidist_db(
 )
 ```
 
-The reproduction number (`R`) is the first argument in `sim_linelist()` and with the serial interval will control the growth rate of the simulated epidemic. Here we set it as 1.1. The minimum requirements to simulate a line list are the reproduction number (`R`), the serial interval, onset-to-hospitalisation delay and onset-to-death delay.
+The seed is set to ensure the output of the vignette is consistent. When using {simulist}, setting the seed is not required unless you need to simulate the same line list multiple times.
+
+```{r, set-seed}
+set.seed(123)
+```
+
+The mean (average) number of contacts (`mean_contacts`) is the first argument in `sim_linelist()` and with the contact interval and probability of infection per contact (`prob_infect`) will control the growth rate of the simulated epidemic. Here we set the mean number of contacts as 2 and the probability of infection as 0.5 (on average half of contacts become infected). The minimum requirements to simulate a line list are the mean number of contacts (`mean_contacts`), the serial interval, probability of infection, onset-to-hospitalisation delay and onset-to-death delay.
 
 ```{r, sim-linelist}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )
@@ -78,8 +84,9 @@ The line list output from the {simulist} simulation contains a column (`case_typ
 
 ```{r, sim-linelist-default-case-type}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )
@@ -90,8 +97,9 @@ To alter these probabilities, supply a named vector to the `sim_linelist()` argu
 
 ```{r, sim-linelist-mod-case-type}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   case_type_probs = c(suspected = 0.05, probable = 0.05, confirmed = 0.9)
@@ -105,7 +113,7 @@ The way {simulist} assigns case types is by pasting case types onto existing cas
 
 ## Conditioning simulation on outbreak size
 
-The reproduction number has a strong influence on the size of an outbreak. However, the {simulist} package generates line list data using a stochastic algorithm, so even when $R < 1$ it can produce a substantial outbreak by chance, or an $R >> 1$ will sometimes not produce a vast epidemic in one simulation (i.e. one replicate) due to the stochasticity. 
+The reproduction number has a strong influence on the size of an outbreak. For {simulist}, the reproduction number is determined by the mean number of contacts and the probability of infection. However, the {simulist} package generates line list data using a stochastic algorithm, so even when $R < 1$ it can produce a substantial outbreak by chance, or an $R >> 1$ will sometimes not produce a vast epidemic in one simulation (i.e. one replicate) due to the stochasticity. 
 
 When requiring a minimum outbreak size we can specify the `min_outbreak_size` argument in `sim_linelist()`. By default this is set to 10. This means that the simulation will not return a line list until the conditioning has been met. In other words, the simulation will resimulate a branching process model until an outbreak infects at least 10 people.
 
@@ -113,8 +121,9 @@ When requiring a line list that represents a large outbreak, such as the COVID-1
 
 ```{r, sim-large-linelist}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   min_outbreak_size = 250
@@ -122,7 +131,7 @@ linelist <- sim_linelist(
 head(linelist)
 ```
 
-The amount of time the simulation takes can be determined by the reproduction number (`R`) and the minimum outbreak size (`min_outbreak_size`). If the `min_outbreak_size` is large, for example hundreds or thousands of cases, and the reproduction number is below one, it will take many branching process simulations until finding one that produces a sufficiently large epidemic.
+The amount of time the simulation takes can be determined by the mean number of contacts (`mean_contacts`), the probability of infection (`prob_infect`) and the minimum outbreak size (`min_outbreak_size`). If the `min_outbreak_size` is large, for example hundreds or thousands of cases, and the mean number of contacts and probability of infection mean the reproduction number is below one, it will take many branching process simulations until finding one that produces a sufficiently large epidemic.
 
 ## Anonymous line list
 
@@ -130,8 +139,9 @@ By default `sim_linelist()` provides the name of each individual in the line lis
 
 ```{r sim-anon-linelist}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   add_names = FALSE
@@ -151,21 +161,13 @@ For an overview of how a line list can be simulated with age-stratified (or age-
 
 ## Simulate contacts table
 
-To simulate a contacts table, the `sim_contacts()` function can be used. This requires a contacts distribution (i.e. a distribution describing the variability in number of contacts for different individuals within the population). 
+To simulate a contacts table, the `sim_contacts()` function can be used. This requires the same arguments as `sim_linelist()`, but does not require the onset-to-hospitalisation delay and onset-to-death delays.
 
 ```{r, sim-contacts}
-contact_distribution <- epiparameter::epidist(
-  disease = "COVID-19",
-  epi_dist = "contact_distribution",
-  prob_distribution = "pois",
-  prob_distribution_params = c(l = 5)
-)
-
 contacts <- sim_contacts(
-  R = 1.1,
-  serial_interval = serial_interval,
-  contact_distribution = contact_distribution
-)
+  mean_contacts = 2, 
+  contact_interval = contact_interval, 
+  prob_infect = 0.5)
 head(contacts)
 ```
 
@@ -173,15 +175,15 @@ head(contacts)
 
 To produce both a line list and a contacts table for the same outbreak, the `sim_linelist()` and `sim_contacts()` cannot be used separately due to the stochastic algorithm, meaning the data in the line list will be discordant with the contacts table. 
 
-In order to simulate a line list and a contacts table of the same outbreak the `sim_outbreak()` function is required. This will simulate a single outbreak and return a line list and a contacts table. The inputs of `sim_outbreak()` are a combination of the inputs required for `sim_linelist()` and `sim_contacts()`.
+In order to simulate a line list and a contacts table of the same outbreak the `sim_outbreak()` function is required. This will simulate a single outbreak and return a line list and a contacts table. The inputs of `sim_outbreak()` are the same as the inputs required for `sim_linelist()`.
 
 ```{r, sim-outbreak}
 outbreak <- sim_outbreak(
-  R = 1.1,
-  serial_interval = serial_interval,
-  onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death,
-  contact_distribution = contact_distribution
+  mean_contacts = 2, 
+  contact_interval = contact_interval, 
+  prob_infect = 0.5, 
+  onset_to_hosp = onset_to_hosp, 
+  onset_to_death = onset_to_death
 )
 head(outbreak$linelist)
 head(outbreak$contacts)
@@ -195,11 +197,11 @@ It is possible to use an [anonymous function](https://en.wikipedia.org/wiki/Anon
 
 ```{r, sim-outbreak-anon-func}
 outbreak <- sim_outbreak(
-  R = 1.1,
-  serial_interval = function(x) rgamma(n = x, shape = 2, scale = 2),
+  mean_contacts = 2,
+  contact_interval = function(x) rgamma(n = x, shape = 2, scale = 2),
+  prob_infect = 0.5,
   onset_to_hosp = function(x) rlnorm(n = x, meanlog = 1.5, sdlog = 0.5),
-  onset_to_death = function(x) rweibull(n = x, shape = 0.5, scale = 0.2),
-  contact_distribution = function(x) rnbinom(n = x, mu = 5, size = 0.5)
+  onset_to_death = function(x) rweibull(n = x, shape = 0.5, scale = 0.2)
 )
 head(outbreak$linelist)
 head(outbreak$contacts)
diff --git a/vignettes/vis-linelist.Rmd b/vignettes/vis-linelist.Rmd
index 6414c372..dd4023cf 100644
--- a/vignettes/vis-linelist.Rmd
+++ b/vignettes/vis-linelist.Rmd
@@ -28,11 +28,11 @@ library(epicontacts)
 First we load the required delay distributions using the {epiparameter} package.
 
 ```{r, read-delay-dists}
-serial_interval <- epidist(
+contact_interval <- epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
-  prob_distribution_params = c(shape = 1, scale = 1)
+  prob_distribution_params = c(shape = 3, scale = 2)
 )
 
 # get onset to hospital admission from {epiparameter} database
@@ -53,17 +53,19 @@ onset_to_death <- epidist_db(
 Setting the seed ensures we have the same output each time the vignette is rendered. When using {simulist}, setting the seed is not required unless you need to simulate the same line list multiple times.
 
 ```{r, set-seed}
-set.seed(1)
+set.seed(123)
 ```
 
 Using a simple line list simulation with the factory default settings:
 
 ```{r sim-linelist} 
 linelist <- sim_linelist(
-  R = 1.3,
-  serial_interval = serial_interval,
+  mean_contacts = 3,
+  contact_interval = contact_interval,
+  prob_infect = 0.33,
   onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death
+  onset_to_death = onset_to_death, 
+  min_outbreak_size = 500
 )
 ```
 
@@ -160,27 +162,16 @@ Additionally, {epicontacts} provides access to network plotting from JavaScript
 
 :::
 
-The {epicontacts} function `make_epicontacts()` requires both the line list and contacts table, so we will run the `sim_outbreak()` function to produce both. We will use the same epidemiological delay distributions that we used to simulate a line list above. We need an extra distribution to simulate an outbreak, a contact distribution (see the documentation `?sim_outbreak` for more detail). We create this distribution using the {epiparameter} package.
-
-```{r, create-contact-dist}
-contact_distribution <- epiparameter::epidist(
-  disease = "COVID-19",
-  epi_dist = "contact_distribution",
-  prob_distribution = "pois",
-  prob_distribution_params = c(l = 5)
-)
-```
-
-Now we can simulate an outbreak:
+The {epicontacts} function `make_epicontacts()` requires both the line list and contacts table, so we will run the `sim_outbreak()` function to produce both. We will use the same epidemiological delay distributions that we used to simulate a line list above.
 
 ```{r, sim-outbreak}
 set.seed(1)
 outbreak <- sim_outbreak(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death,
-  contact_distribution = contact_distribution
+  onset_to_death = onset_to_death
 )
 head(outbreak$linelist)
 head(outbreak$contacts)

From dfae23dee32c1e905f361e53e1c301819873ef3e Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 18 Jan 2024 15:36:54 +0000
Subject: [PATCH 27/50] remove mention of bpmodels from the package

---
 DESCRIPTION                     | 4 +---
 inst/WORDLIST                   | 1 -
 vignettes/design-principles.Rmd | 5 +++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1bebd8a1..1d2c17af 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,7 +27,6 @@ Imports:
     stats,
     checkmate,
     epiparameter,
-    bpmodels,
     randomNames,
     rlang
 Suggests: 
@@ -40,8 +39,7 @@ Suggests:
     spelling,
     testthat (>= 3.0.0)
 Remotes:
-    epiverse-trace/epiparameter,
-    epiverse-trace/bpmodels
+    epiverse-trace/epiparameter
 Config/testthat/edition: 3
 Config/Needs/website:
     epiverse-trace/epiversetheme
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 6016c8e5..5e8a9579 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,6 +1,5 @@
 apyramid
 bookdown
-bpmodels
 CMD
 codecov
 Codecov
diff --git a/vignettes/design-principles.Rmd b/vignettes/design-principles.Rmd
index 1cfe0e61..e540c3ae 100644
--- a/vignettes/design-principles.Rmd
+++ b/vignettes/design-principles.Rmd
@@ -40,6 +40,8 @@ The simulation functions either return a `<data.frame>` or a `list` of `<data.fr
 
 - The column names of the line list data produced by `sim_linelist()` and `sim_outbreak()` matches the tag names used in the [{linelist} R package](https://CRAN.R-project.org/package=linelist) (an [Epiverse-TRACE R package](https://github.com/epiverse-trace)). This is for continuity of design more than any functional reason. The line list data from {simulist} functions is not tagged _sensu_ [{linelist} tagging](https://epiverse-trace.github.io/linelist/articles/linelist.html). There is an inconsistent use of _hospitalisation_ and _admission_; in the simulated line list it is `date_admission`, but internally the package uses hospitalisation (e.g. `.add_hospitalisation()`). This is because I think _hospitalisation_ is more descriptive but `date_admission` is used by {linelist}.
 
+- {simulist} implements its own branching process model (`.sim_network_bp()`) which tracks contacts of infectious individuals. This is a simple random network model, but for future versions of {simulist} we will make the code modular in order to accept other simulations models. This will remove the burden on {simulist} to simulate from a range of model types.
+
 ## Dependencies
 
 The aim is to restrict the number of dependencies to a minimal required set for ease of maintenance. The current hard dependencies are:
@@ -47,10 +49,9 @@ The aim is to restrict the number of dependencies to a minimal required set for
 * {stats}
 * [{checkmate}](https://CRAN.R-project.org/package=checkmate)
 * [{epiparameter}](https://github.com/epiverse-trace/epiparameter)
-* [{bpmodels}](https://github.com/epiverse-trace/bpmodels)
 * [{randomNames}](https://CRAN.R-project.org/package=randomNames)
 
-{stats} is distributed with the R language so is viewed as a lightweight dependency, that should already be installed on a user's machine if they have R. {checkmate} is an input checking package widely used across Epiverse-TRACE packages. {epiparameter} is used to easily access epidemiological parameters from the package's library, the package is currently unstable and actively developed, however, by using it in another package it can inform the development path of {epiparameter}. {bpmodels} is used to handle the single-type branching process model, this is taken on as a dependency as implementing different transmission models within {simulist} is beyond the package scope and does not scale as we implement more models to generate line list data. {randomNames} provides a utility function for generating random names for case and contact data. The functionality could be replicated in {simulist}, however the {randomNames} package is maintained and contains a range of name generation settings which warrants its use as a dependency.
+{stats} is distributed with the R language so is viewed as a lightweight dependency, that should already be installed on a user's machine if they have R. {checkmate} is an input checking package widely used across Epiverse-TRACE packages. {epiparameter} is used to easily access epidemiological parameters from the package's library, the package is currently unstable and actively developed, however, by using it in another package it can inform the development path of {epiparameter}. {randomNames} provides a utility function for generating random names for case and contact data. The functionality could be replicated in {simulist}, however the {randomNames} package is maintained and contains a range of name generation settings which warrants its use as a dependency.
 
 The soft dependencies (and their minimum version requirements) are:
 

From 2106172808851541bcb268742b41d72c8fac4084 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Thu, 18 Jan 2024 15:38:32 +0000
Subject: [PATCH 28/50] Update CITATION.cff

---
 CITATION.cff | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index d0503012..07942726 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -104,26 +104,6 @@ references:
     email: adam.kucharski@lshtm.ac.uk
     orcid: https://orcid.org/0000-0001-8814-9421
   year: '2024'
-- type: software
-  title: bpmodels
-  abstract: 'bpmodels: Simulating and Analysing Transmission Chain Statistics using
-    Branching Process Models'
-  notes: Imports
-  url: https://epiverse-trace.github.io/bpmodels/
-  authors:
-  - family-names: Azam
-    given-names: James M.
-    email: james.azam@lshtm.ac.uk
-    orcid: https://orcid.org/0000-0001-5782-7330
-  - family-names: Finger
-    given-names: Flavio
-    email: flavio.finger@epicentre.msf.org
-    orcid: https://orcid.org/0000-0002-8613-5170
-  - family-names: Funk
-    given-names: Sebastian
-    email: sebastian.funk@lshtm.ac.uk
-    orcid: https://orcid.org/0000-0002-2842-3406
-  year: '2024'
 - type: software
   title: randomNames
   abstract: 'randomNames: Generate Random Given and Surnames'

From e19b496b4cb97edd70b2e6bcba8087eb0176e58a Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 18 Jan 2024 16:47:14 +0000
Subject: [PATCH 29/50] update README to use new simulation function arguments

---
 README.Rmd | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 0337402a..80d777dd 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -50,13 +50,13 @@ library(simulist)
 library(epiparameter)
 ```
 
-The line list simulation requires that we define a serial interval, onset-to-hospitalisation delay, and onset-to-death delay. We can load these from the library of epidemiological parameters in the `{epiparameter}` R package if available, or if these are not in the database yet (such as the serial interval for COVID-19) we can define them ourselves.
+The line list simulation requires that we define a contact interval, onset-to-hospitalisation delay, and onset-to-death delay. We can load these from the library of epidemiological parameters in the `{epiparameter}` R package if available, or if these are not in the database yet (such as the contact interval for COVID-19) we can define them ourselves.
 
 ```{r create-epidists}
-# create COVID-19 serial interval
-serial_interval <- epiparameter::epidist(
+# create COVID-19 contact interval
+contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
@@ -76,12 +76,14 @@ onset_to_death <- epiparameter::epidist_db(
 )
 ```
 
-To simulate a line list for COVID-19 with an assumed reproduction number (`R`) of 1.1 we use the `sim_linelist()` function. Using a reproduction number greater than one means we will likely get a reasonably sized outbreak (10 - 1000 cases, varying due to the stochastic simulation). _Do not set the reproduction number too high (e.g. >5) as the outbreak can become extremely large_.
+To simulate a line list for COVID-19 with an assumed average number of contacts of 2 and a probability of infection per contact of 0.5, we use the `sim_linelist()` function. The mean number of contacts and probability of infection determine the outbreak reproduction number, if the resulting reproduction number is around one it means we will likely get a reasonably sized outbreak (10 - 1000 cases, varying due to the stochastic simulation). _Take care when setting the mean number of contacts and the probability of infection, as this can lead to the outbreak becoming extremely large_.
 
 ```{r sim-linelist}
+set.seed(1)
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )
@@ -92,8 +94,9 @@ In this example, the line list is simulated using the default values (see `?sim_
 
 ```{r sim-linelist-diff-args}
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = 0.01,
@@ -102,20 +105,13 @@ linelist <- sim_linelist(
 head(linelist)
 ```
 
-To simulate a table of contacts of cases (i.e. to reflect a contact tracing dataset) we can use the same serial interval defined for the example above. We additionally need a contact distribution, which represents the probability that each person that infected an individual, also had a given number of contacts that did not become infected.
+To simulate a table of contacts of cases (i.e. to reflect a contact tracing dataset) we can use the same parameters defined for the example above.
 
 ```{r, sim-contacts}
-contact_distribution <- epiparameter::epidist(
-  disease = "COVID-19",
-  epi_dist = "contact_distribution",
-  prob_distribution = "pois",
-  prob_distribution_params = c(l = 5)
-)
-
 contacts <- sim_contacts(
-  R = 1.1,
-  serial_interval = serial_interval,
-  contact_distribution = contact_distribution
+  mean_contacts = 2, 
+  contact_interval = contact_interval, 
+  prob_infect = 0.5
 )
 head(contacts)
 ```
@@ -124,11 +120,11 @@ If both the line list and contacts table are required, they can be jointly simul
 
 ```{r, sim-outbreak}
 outbreak <- sim_outbreak(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death,
-  contact_distribution = contact_distribution
+  onset_to_death = onset_to_death
 )
 head(outbreak$linelist)
 head(outbreak$contacts)

From 99704766f77318186bf7090ab28c1b8f73a46c01 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Thu, 18 Jan 2024 16:50:11 +0000
Subject: [PATCH 30/50] Automatic readme update

---
 README.md | 200 ++++++++++++++++++++++++++----------------------------
 1 file changed, 98 insertions(+), 102 deletions(-)

diff --git a/README.md b/README.md
index 9b7d579a..69b07eb3 100644
--- a/README.md
+++ b/README.md
@@ -50,18 +50,18 @@ library(simulist)
 library(epiparameter)
 ```
 
-The line list simulation requires that we define a serial interval,
+The line list simulation requires that we define a contact interval,
 onset-to-hospitalisation delay, and onset-to-death delay. We can load
 these from the library of epidemiological parameters in the
 `{epiparameter}` R package if available, or if these are not in the
-database yet (such as the serial interval for COVID-19) we can define
+database yet (such as the contact interval for COVID-19) we can define
 them ourselves.
 
 ``` r
-# create COVID-19 serial interval
-serial_interval <- epiparameter::epidist(
+# create COVID-19 contact interval
+contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
-  epi_dist = "serial interval",
+  epi_dist = "contact interval",
   prob_distribution = "gamma",
   prob_distribution_params = c(shape = 1, scale = 1)
 )
@@ -96,35 +96,40 @@ onset_to_death <- epiparameter::epidist_db(
 #> To retrieve the short citation use the 'get_citation' function
 ```
 
-To simulate a line list for COVID-19 with an assumed reproduction number
-(`R`) of 1.1 we use the `sim_linelist()` function. Using a reproduction
-number greater than one means we will likely get a reasonably sized
-outbreak (10 - 1000 cases, varying due to the stochastic simulation).
-*Do not set the reproduction number too high (e.g. \>5) as the outbreak
-can become extremely large*.
+To simulate a line list for COVID-19 with an assumed average number of
+contacts of 2 and a probability of infection per contact of 0.5, we use
+the `sim_linelist()` function. The mean number of contacts and
+probability of infection determine the outbreak reproduction number, if
+the resulting reproduction number is around one it means we will likely
+get a reasonably sized outbreak (10 - 1000 cases, varying due to the
+stochastic simulation). *Take care when setting the mean number of
+contacts and the probability of infection, as this can lead to the
+outbreak becoming extremely large*.
 
 ``` r
+set.seed(1)
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )
 head(linelist)
-#>   id          case_name case_type gender age date_onset date_admission
-#> 1  1      Paul Reynolds confirmed      m   5 2023-01-01           <NA>
-#> 2  2       Kayla Martin suspected      f   6 2023-01-01           <NA>
-#> 3  3   Christian Oberai confirmed      m  59 2023-01-02           <NA>
-#> 4  4 Alexander Anderson confirmed      m  90 2023-01-01           <NA>
-#> 5  5     Matthew Bodine  probable      m  38 2023-01-02           <NA>
-#> 6  6    Randa al-Mansur  probable      f   5 2023-01-02     2023-01-11
+#>   id         case_name case_type gender age date_onset date_admission
+#> 1  1 Sabeeha el-Hannan  probable      f  28 2023-01-01           <NA>
+#> 2  2    Jaedyn Robbins confirmed      f  62 2023-01-02     2023-01-02
+#> 5  5          Young Vu confirmed      m  42 2023-01-01           <NA>
+#> 6  6      Alyssa Gloyd  probable      f  60 2023-01-01           <NA>
+#> 8  8   Sebastian Boden  probable      m  28 2023-01-02     2023-01-03
+#> 9  9  Sierra Hernandez suspected      f  78 2023-01-01           <NA>
 #>   date_death date_first_contact date_last_contact ct_value
-#> 1       <NA>               <NA>              <NA>     25.4
-#> 2       <NA>         2022-12-31        2023-01-02       NA
-#> 3       <NA>         2023-01-01        2023-01-03     25.4
-#> 4       <NA>         2022-12-29        2023-01-05     25.4
-#> 5       <NA>         2022-12-29        2023-01-04       NA
-#> 6       <NA>         2023-01-04        2023-01-06       NA
+#> 1       <NA>               <NA>              <NA>       NA
+#> 2       <NA>         2023-01-02        2023-01-05     25.1
+#> 5       <NA>         2023-01-03        2023-01-04     25.1
+#> 6       <NA>         2023-01-02        2023-01-04       NA
+#> 8       <NA>         2023-01-01        2023-01-03       NA
+#> 9       <NA>         2022-12-29        2023-01-04       NA
 ```
 
 In this example, the line list is simulated using the default values
@@ -135,65 +140,56 @@ modify either of these, we can specify them in the function.
 
 ``` r
 linelist <- sim_linelist(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death,
   hosp_risk = 0.01,
   outbreak_start_date = as.Date("2019-12-01")
 )
 head(linelist)
-#>   id       case_name case_type gender age date_onset date_admission date_death
-#> 1  1   Sabrina Jones confirmed      f  90 2019-12-01           <NA>       <NA>
-#> 2  2    Ariel Aragon  probable      f  81 2019-12-01           <NA>       <NA>
-#> 3  3 Shandon Roberts confirmed      m  44 2019-12-03           <NA>       <NA>
-#> 4  4 Nathan Martinez confirmed      m  43 2019-12-01           <NA>       <NA>
-#> 5  5     Maria Downs suspected      f  62 2019-12-03           <NA>       <NA>
-#> 6  6    Andre Turner suspected      m  49 2019-12-01           <NA>       <NA>
-#>   date_first_contact date_last_contact ct_value
-#> 1               <NA>              <NA>     24.5
-#> 2         2019-12-03        2019-12-06       NA
-#> 3         2019-12-02        2019-12-04     24.5
-#> 4         2019-12-01        2019-12-04     24.5
-#> 5         2019-12-06        2019-12-08       NA
-#> 6         2019-12-02        2019-12-05       NA
+#>   id              case_name case_type gender age date_onset date_admission
+#> 1  1 Aaren-Matthew Deguzman  probable      m  65 2019-12-01           <NA>
+#> 2  2      Thaamira el-Yusuf confirmed      f  61 2019-12-01           <NA>
+#> 3  3         Hector Hickman suspected      m  56 2019-12-01           <NA>
+#> 4  4     Zuhriyaa al-Saleem  probable      f  36 2019-12-01           <NA>
+#> 5  5            Chisa Xiong confirmed      f  20 2019-12-01           <NA>
+#> 6  6     Tre-Shawn Williams confirmed      m  85 2019-12-02     2019-12-06
+#>   date_death date_first_contact date_last_contact ct_value
+#> 1       <NA>               <NA>              <NA>       NA
+#> 2       <NA>         2019-11-26        2019-12-06     25.3
+#> 3       <NA>         2019-11-28        2019-12-03       NA
+#> 4       <NA>         2019-11-28        2019-12-02       NA
+#> 5       <NA>         2019-11-28        2019-12-02     25.3
+#> 6       <NA>         2019-12-01        2019-12-04     25.3
 ```
 
 To simulate a table of contacts of cases (i.e. to reflect a contact
-tracing dataset) we can use the same serial interval defined for the
-example above. We additionally need a contact distribution, which
-represents the probability that each person that infected an individual,
-also had a given number of contacts that did not become infected.
+tracing dataset) we can use the same parameters defined for the example
+above.
 
 ``` r
-contact_distribution <- epiparameter::epidist(
-  disease = "COVID-19",
-  epi_dist = "contact_distribution",
-  prob_distribution = "pois",
-  prob_distribution_params = c(l = 5)
-)
-#> Citation cannot be created as author, year, journal or title is missing
-
 contacts <- sim_contacts(
-  R = 1.1,
-  serial_interval = serial_interval,
-  contact_distribution = contact_distribution
+  mean_contacts = 2, 
+  contact_interval = contact_interval, 
+  prob_infect = 0.5
 )
 head(contacts)
-#>              from               to cnt_age cnt_gender date_first_contact
-#> 1 Waseema el-Huda     Haasini Dinh      62          f         2022-12-30
-#> 2 Waseema el-Huda Jahlisha Eckhart       6          f         2023-01-02
-#> 3 Waseema el-Huda    Sofya Brabson       1          f         2022-12-27
-#> 4 Waseema el-Huda      Eoin Acosta      67          m         2022-12-30
-#> 5 Waseema el-Huda      Mandeep Cha      37          m         2023-01-03
-#> 6 Waseema el-Huda     Brianna Dock      21          f         2023-01-03
+#>             from               to age gender date_first_contact
+#> 2  James Padilla   Miranda Blanco  69      f         2022-12-31
+#> 3  James Padilla      Allan Bunge  83      m         2023-01-05
+#> 4    Allan Bunge   Nicholas Rabia  18      m         2023-01-02
+#> 5    Allan Bunge Unais al-Shabazz  85      m         2023-01-04
+#> 6 Nicholas Rabia    Apiluck Chong  84      m         2022-12-30
+#> 7 Nicholas Rabia     Rachel Tyler  90      f         2023-01-02
 #>   date_last_contact was_case         status
-#> 1        2023-01-04        Y           case
-#> 2        2023-01-04        Y           case
-#> 3        2023-01-01        N under_followup
-#> 4        2023-01-03        N under_followup
-#> 5        2023-01-08        N under_followup
-#> 6        2023-01-05        N        unknown
+#> 2        2023-01-03        N        unknown
+#> 3        2023-01-05        Y           case
+#> 4        2023-01-05        Y           case
+#> 5        2023-01-06        Y           case
+#> 6        2023-01-02        N under_followup
+#> 7        2023-01-04        Y           case
 ```
 
 If both the line list and contacts table are required, they can be
@@ -204,42 +200,42 @@ the same default settings as the other functions).
 
 ``` r
 outbreak <- sim_outbreak(
-  R = 1.1,
-  serial_interval = serial_interval,
+  mean_contacts = 2,
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death,
-  contact_distribution = contact_distribution
+  onset_to_death = onset_to_death
 )
 head(outbreak$linelist)
-#>   id          case_name case_type gender age date_onset date_admission
-#> 1  1   Uqbah al-Sultana confirmed      m  41 2023-01-01           <NA>
-#> 2  2  Ulises Mora-Rojas  probable      m  41 2023-01-01           <NA>
-#> 3  3 Katherine Galambos  probable      f  41 2023-01-02     2023-01-03
-#> 4  4         Annie Doak  probable      f  63 2023-01-01           <NA>
-#> 5  5          Erika Yin confirmed      f  89 2023-01-02     2023-01-02
-#> 6  6  Casey Borcherding confirmed      f  47 2023-01-01           <NA>
-#>   date_death date_first_contact date_last_contact ct_value
-#> 1       <NA>               <NA>              <NA>       26
-#> 2       <NA>         2022-12-31        2023-01-04       NA
-#> 3       <NA>         2023-01-01        2023-01-02       NA
-#> 4       <NA>         2022-12-27        2023-01-02       NA
-#> 5       <NA>         2022-12-31        2023-01-05       26
-#> 6       <NA>         2023-01-04        2023-01-05       26
+#>    id           case_name case_type gender age date_onset date_admission
+#> 1   1   Katherin Trancoso confirmed      f   2 2023-01-01           <NA>
+#> 4   4        E-Shaw Allen confirmed      f  47 2023-01-01           <NA>
+#> 5   5     Madison Moltrer suspected      f   8 2023-01-01           <NA>
+#> 8   8 Christopher Richter confirmed      m  18 2023-01-02     2023-01-08
+#> 11 11      Elias Mckenzie confirmed      m   4 2023-01-01           <NA>
+#> 12 12       Elaine Nguyen confirmed      f  60 2023-01-02           <NA>
+#>    date_death date_first_contact date_last_contact ct_value
+#> 1        <NA>               <NA>              <NA>     24.5
+#> 4        <NA>         2023-01-02        2023-01-06     24.5
+#> 5        <NA>         2022-12-31        2023-01-03       NA
+#> 8        <NA>         2023-01-02        2023-01-03     24.5
+#> 11       <NA>         2023-01-04        2023-01-06     24.5
+#> 12       <NA>         2023-01-07        2023-01-09     24.5
 head(outbreak$contacts)
-#>               from                 to cnt_age cnt_gender date_first_contact
-#> 1 Uqbah al-Sultana  Ulises Mora-Rojas      41          m         2022-12-31
-#> 2 Uqbah al-Sultana Katherine Galambos      41          f         2023-01-01
-#> 3 Uqbah al-Sultana   Alexander Foster       2          m         2023-01-06
-#> 4 Uqbah al-Sultana Daniel Ordaz-Lopez       2          m         2023-01-03
-#> 5 Uqbah al-Sultana   Intisaar al-Saah      74          f         2023-01-02
-#> 6 Uqbah al-Sultana     Alondra Juarez      23          f         2022-12-31
-#>   date_last_contact was_case         status
-#> 1        2023-01-04        Y           case
-#> 2        2023-01-02        Y           case
-#> 3        2023-01-08        N under_followup
-#> 4        2023-01-03        N under_followup
-#> 5        2023-01-04        N under_followup
-#> 6        2023-01-07        N under_followup
+#>                from                to age gender date_first_contact
+#> 2 Katherin Trancoso      David Mcafee  46      m         2023-01-02
+#> 3 Katherin Trancoso Firdaus al-Hamidi  68      f         2022-12-30
+#> 4 Katherin Trancoso      E-Shaw Allen  47      f         2023-01-02
+#> 5 Katherin Trancoso   Madison Moltrer   8      f         2022-12-31
+#> 6      E-Shaw Allen       Tyko Pahang  24      m         2023-01-05
+#> 7   Madison Moltrer   Bryan Rodriguez  67      m         2022-12-31
+#>   date_last_contact was_case           status
+#> 2        2023-01-02        N lost_to_followup
+#> 3        2023-01-02        N   under_followup
+#> 4        2023-01-06        Y             case
+#> 5        2023-01-03        Y             case
+#> 6        2023-01-05        N lost_to_followup
+#> 7        2023-01-02        N   under_followup
 ```
 
 ## Help

From 0d2cf1de6fa99e1fd4843b6026082fd92bafad40 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 12:05:06 +0000
Subject: [PATCH 31/50] update mean_contacts to contact_distribution

---
 R/checkers.R               |  4 ++--
 R/sim_contacts.R           | 16 ++++++++++++----
 R/sim_linelist.R           | 25 ++++++++++++++++++-------
 R/sim_network_bp.R         |  6 +++---
 R/sim_outbreak.R           | 17 +++++++++++++----
 R/sim_utils.R              |  4 ++--
 man/dot-check_sim_input.Rd |  8 +++++---
 man/dot-sim_network_bp.Rd  |  8 +++++---
 man/dot-sim_utils.Rd       |  8 +++++---
 man/sim_contacts.Rd        | 17 +++++++++++++----
 man/sim_linelist.Rd        | 19 ++++++++++++++-----
 man/sim_outbreak.Rd        | 17 +++++++++++++----
 12 files changed, 105 insertions(+), 44 deletions(-)

diff --git a/R/checkers.R b/R/checkers.R
index a74cdea9..13881222 100644
--- a/R/checkers.R
+++ b/R/checkers.R
@@ -114,7 +114,7 @@
 #' called for its side-effects, which will error if the input is invalid.
 #' @keywords internal
 .check_sim_input <- function(sim_type = c("linelist", "contacts", "outbreak"), # nolint cyclocomp_linter
-                             mean_contacts,
+                             contact_distribution,
                              contact_interval,
                              prob_infect,
                              outbreak_start_date,
@@ -132,7 +132,7 @@
   sim_type <- match.arg(sim_type)
 
   checkmate::assert_number(prob_infect, lower = 0, upper = 1)
-  checkmate::assert_number(mean_contacts, lower = 1)
+  .check_func_req_args(contact_distribution)
   .check_func_req_args(contact_interval)
   checkmate::assert_date(outbreak_start_date)
   checkmate::assert_integerish(min_outbreak_size, lower = 1)
diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index 07428492..f41b37dd 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -13,6 +13,13 @@
 #'
 #' @examples
 #' # load data required to simulate contacts
+#' contact_distribution <- epiparameter::epidist(
+#'   disease = "COVID-19",
+#'   epi_dist = "contact distribution",
+#'   prob_distribution = "pois",
+#'   prob_distribution_params = c(mean = 2)
+#' )
+#'
 #' contact_interval <- epiparameter::epidist(
 #'   disease = "COVID-19",
 #'   epi_dist = "contact interval",
@@ -21,11 +28,11 @@
 #' )
 #'
 #' contacts <- sim_contacts(
-#'   mean_contacts = 2,
+#'   contact_distribution = contact_distribution,
 #'   contact_interval = contact_interval,
 #'   prob_infect = 0.5
 #' )
-sim_contacts <- function(mean_contacts,
+sim_contacts <- function(contact_distribution,
                          contact_interval,
                          prob_infect,
                          outbreak_start_date = as.Date("2023-01-01"),
@@ -42,11 +49,12 @@ sim_contacts <- function(mean_contacts,
     "Input delay distributions need to be either functions or <epidist>" =
       inherits(contact_interval, c("function", "epidist"))
   )
+  contact_distribution <- as.function(contact_distribution, func_type = "density")
   contact_interval <- as.function(contact_interval, func_type = "generate")
 
   .check_sim_input(
     sim_type = "contacts",
-    mean_contacts = mean_contacts,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
@@ -56,7 +64,7 @@ sim_contacts <- function(mean_contacts,
   )
 
   chain <- .sim_bp_linelist(
-    mean_contacts = mean_contacts,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index e611f313..02942954 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -20,8 +20,10 @@
 #' * `proportion`: a column with the proportion of the population that are in
 #' that age group. Proportions must sum to one.
 #'
-#' @param mean_contacts A single `numeric` for the mean number of contacts per
-#' infection.
+#' @param contact_distribution An `<epidist>` object or anonymous function for
+#' the contact distribution. This is any discrete density function that
+#' produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+#' number of contacts per infection.
 #' @param contact_interval An `<epidist>` object or anonymous function for
 #' the contact interval. This is analogous to the serial interval or generation
 #' time, but defines the time interval between an individual being
@@ -75,6 +77,13 @@
 #'
 #' @examples
 #' # load data required to simulate line list
+#' contact_distribution <- epiparameter::epidist(
+#'   disease = "COVID-19",
+#'   epi_dist = "contact distribution",
+#'   prob_distribution = "pois",
+#'   prob_distribution_params = c(mean = 2)
+#' )
+#'
 #' contact_interval <- epiparameter::epidist(
 #'   disease = "COVID-19",
 #'   epi_dist = "contact interval",
@@ -97,7 +106,7 @@
 #' )
 #' # example with single hospitalisation risk for entire population
 #' linelist <- sim_linelist(
-#'   mean_contacts = 2,
+#'   contact_distribution = contact_distribution,
 #'   contact_interval = contact_interval,
 #'   prob_infect = 0.5,
 #'   onset_to_hosp = onset_to_hosp,
@@ -115,7 +124,7 @@
 #'   risk = c(0.1, 0.05, 0.2)
 #' )
 #' linelist <- sim_linelist(
-#'   mean_contacts = 2,
+#'   contact_distribution = contact_distribution,
 #'   contact_interval = contact_interval,
 #'   prob_infect = 0.5,
 #'   onset_to_hosp = onset_to_hosp,
@@ -123,7 +132,7 @@
 #'   hosp_risk = age_dep_hosp_risk
 #' )
 #' head(linelist)
-sim_linelist <- function(mean_contacts,
+sim_linelist <- function(contact_distribution,
                          contact_interval,
                          prob_infect,
                          onset_to_hosp,
@@ -145,17 +154,19 @@ sim_linelist <- function(mean_contacts,
   # check and convert distribution to func if needed before .check_sim_input()
   stopifnot(
     "Input delay distributions need to be either functions or <epidist>" =
+      inherits(contact_distribution, c("function", "epidist")) &&
       inherits(contact_interval, c("function", "epidist")) &&
       inherits(onset_to_hosp, c("function", "epidist")) &&
       inherits(onset_to_death, c("function", "epidist"))
   )
+  contact_distribution <- as.function(contact_distribution, func_type = "density")
   contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
 
   .check_sim_input(
     sim_type = "linelist",
-    mean_contacts = mean_contacts,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
@@ -200,7 +211,7 @@ sim_linelist <- function(mean_contacts,
   }
 
   chain <- .sim_bp_linelist(
-    mean_contacts = mean_contacts,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index c4799e2f..d4f53201 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -21,7 +21,7 @@
 #'
 #' @return A `<data.frame>` with the contact and transmission chain data.
 #' @keywords internal
-.sim_network_bp <- function(mean_contacts,
+.sim_network_bp <- function(contact_distribution,
                             contact_interval,
                             prob_infect,
                             add_names) {
@@ -49,8 +49,8 @@
 
   # run loop until no more individuals are sampled
   while (next_gen_size > 0) {
-    # sample contact distribution
-    q <- stats::dpois(0:100 + 1, lambda = mean_contacts) * (0:100 + 1)
+    # sample contact distribution (excess degree distribution)
+    q <- contact_distribution(0:100 + 1) * (0:100 + 1)
     q <- q / sum(q)
     contacts <- sample(0:100, size = next_gen_size, replace = TRUE, prob = q)
 
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index f4a619c2..665bbe55 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -18,6 +18,13 @@
 #'
 #' @examples
 #' # load data required to simulate outbreak data
+#' contact_distribution <- epiparameter::epidist(
+#'   disease = "COVID-19",
+#'   epi_dist = "contact distribution",
+#'   prob_distribution = "pois",
+#'   prob_distribution_params = c(mean = 2)
+#' )
+#'
 #' contact_interval <- epiparameter::epidist(
 #'   disease = "COVID-19",
 #'   epi_dist = "serial interval",
@@ -40,13 +47,13 @@
 #' )
 #'
 #' outbreak <- sim_outbreak(
-#'   mean_contacts = 2,
+#'   contact_distribution = contact_distribution,
 #'   contact_interval = contact_interval,
 #'   prob_infect = 0.5,
 #'   onset_to_hosp = onset_to_hosp,
 #'   onset_to_death = onset_to_death
 #' )
-sim_outbreak <- function(mean_contacts,
+sim_outbreak <- function(contact_distribution,
                          contact_interval,
                          prob_infect,
                          onset_to_hosp,
@@ -73,17 +80,19 @@ sim_outbreak <- function(mean_contacts,
   # check and convert distribution to func if needed before .check_sim_input()
   stopifnot(
     "Input delay distributions need to be either functions or <epidist>" =
+      inherits(contact_distribution, c("function", "epidist")) &&
       inherits(contact_interval, c("function", "epidist")) &&
       inherits(onset_to_hosp, c("function", "epidist")) &&
       inherits(onset_to_death, c("function", "epidist"))
   )
+  contact_distribution <- as.function(contact_distribution, func_type = "density")
   contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
 
   .check_sim_input(
     sim_type = "outbreak",
-    mean_contacts = mean_contacts,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
@@ -129,7 +138,7 @@ sim_outbreak <- function(mean_contacts,
   }
 
   chain <- .sim_bp_linelist(
-    mean_contacts = mean_contacts,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = prob_infect,
     outbreak_start_date = outbreak_start_date,
diff --git a/R/sim_utils.R b/R/sim_utils.R
index 8d010996..4c81bba4 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -14,7 +14,7 @@
 NULL
 
 #' @name .sim_utils
-.sim_bp_linelist <- function(mean_contacts,
+.sim_bp_linelist <- function(contact_distribution,
                              contact_interval,
                              prob_infect,
                              outbreak_start_date,
@@ -27,7 +27,7 @@ NULL
   # condition on a minimum chain size
   while (outbreak_size < min_outbreak_size) {
     chain <- .sim_network_bp(
-      mean_contacts = mean_contacts,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = prob_infect,
       add_names = add_names
diff --git a/man/dot-check_sim_input.Rd b/man/dot-check_sim_input.Rd
index 533165a8..525ac581 100644
--- a/man/dot-check_sim_input.Rd
+++ b/man/dot-check_sim_input.Rd
@@ -6,7 +6,7 @@
 \usage{
 .check_sim_input(
   sim_type = c("linelist", "contacts", "outbreak"),
-  mean_contacts,
+  contact_distribution,
   contact_interval,
   prob_infect,
   outbreak_start_date,
@@ -27,8 +27,10 @@
 \item{sim_type}{A \code{character} string specifying which simulation function
 this function is being called within.}
 
-\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
-infection.}
+\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
+the contact distribution. This is any discrete density function that
+produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+number of contacts per infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
index daab90a0..9a92853d 100644
--- a/man/dot-sim_network_bp.Rd
+++ b/man/dot-sim_network_bp.Rd
@@ -5,11 +5,13 @@
 \title{Simulate a random network branching process model with a probability of
 infection for each contact}
 \usage{
-.sim_network_bp(mean_contacts, contact_interval, prob_infect, add_names)
+.sim_network_bp(contact_distribution, contact_interval, prob_infect, add_names)
 }
 \arguments{
-\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
-infection.}
+\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
+the contact distribution. This is any discrete density function that
+produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+number of contacts per infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
diff --git a/man/dot-sim_utils.Rd b/man/dot-sim_utils.Rd
index 49c5a0ba..d07a53a4 100644
--- a/man/dot-sim_utils.Rd
+++ b/man/dot-sim_utils.Rd
@@ -8,7 +8,7 @@
 within \pkg{simulist}}
 \usage{
 .sim_bp_linelist(
-  mean_contacts,
+  contact_distribution,
   contact_interval,
   prob_infect,
   outbreak_start_date,
@@ -33,8 +33,10 @@ within \pkg{simulist}}
 )
 }
 \arguments{
-\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
-infection.}
+\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
+the contact distribution. This is any discrete density function that
+produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+number of contacts per infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
diff --git a/man/sim_contacts.Rd b/man/sim_contacts.Rd
index f85561bb..a467e6b7 100644
--- a/man/sim_contacts.Rd
+++ b/man/sim_contacts.Rd
@@ -5,7 +5,7 @@
 \title{Simulate contacts for an infectious disease outbreak}
 \usage{
 sim_contacts(
-  mean_contacts,
+  contact_distribution,
   contact_interval,
   prob_infect,
   outbreak_start_date = as.Date("2023-01-01"),
@@ -17,8 +17,10 @@ sim_contacts(
 )
 }
 \arguments{
-\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
-infection.}
+\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
+the contact distribution. This is any discrete density function that
+produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+number of contacts per infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
@@ -59,6 +61,13 @@ Simulate contacts for an infectious disease outbreak
 }
 \examples{
 # load data required to simulate contacts
+contact_distribution <- epiparameter::epidist(
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
+  prob_distribution_params = c(mean = 2)
+)
+
 contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
   epi_dist = "contact interval",
@@ -67,7 +76,7 @@ contact_interval <- epiparameter::epidist(
 )
 
 contacts <- sim_contacts(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5
 )
diff --git a/man/sim_linelist.Rd b/man/sim_linelist.Rd
index 96bfb7a4..b8ce42f2 100644
--- a/man/sim_linelist.Rd
+++ b/man/sim_linelist.Rd
@@ -5,7 +5,7 @@
 \title{Simulate a line list}
 \usage{
 sim_linelist(
-  mean_contacts,
+  contact_distribution,
   contact_interval,
   prob_infect,
   onset_to_hosp,
@@ -23,8 +23,10 @@ sim_linelist(
 )
 }
 \arguments{
-\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
-infection.}
+\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
+the contact distribution. This is any discrete density function that
+produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+number of contacts per infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
@@ -116,6 +118,13 @@ that age group. Proportions must sum to one.
 }
 \examples{
 # load data required to simulate line list
+contact_distribution <- epiparameter::epidist(
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
+  prob_distribution_params = c(mean = 2)
+)
+
 contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
   epi_dist = "contact interval",
@@ -138,7 +147,7 @@ onset_to_death <- epiparameter::epidist_db(
 )
 # example with single hospitalisation risk for entire population
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -156,7 +165,7 @@ age_dep_hosp_risk <- data.frame(
   risk = c(0.1, 0.05, 0.2)
 )
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
diff --git a/man/sim_outbreak.Rd b/man/sim_outbreak.Rd
index 1de6a77a..f238571a 100644
--- a/man/sim_outbreak.Rd
+++ b/man/sim_outbreak.Rd
@@ -5,7 +5,7 @@
 \title{Simulate a line list and a contacts table}
 \usage{
 sim_outbreak(
-  mean_contacts,
+  contact_distribution,
   contact_interval,
   prob_infect,
   onset_to_hosp,
@@ -25,8 +25,10 @@ sim_outbreak(
 )
 }
 \arguments{
-\item{mean_contacts}{A single \code{numeric} for the mean number of contacts per
-infection.}
+\item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
+the contact distribution. This is any discrete density function that
+produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the
+number of contacts per infection.}
 
 \item{contact_interval}{An \verb{<epidist>} object or anonymous function for
 the contact interval. This is analogous to the serial interval or generation
@@ -128,6 +130,13 @@ that age group. Proportions must sum to one.
 }
 \examples{
 # load data required to simulate outbreak data
+contact_distribution <- epiparameter::epidist(
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
+  prob_distribution_params = c(mean = 2)
+)
+
 contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
   epi_dist = "serial interval",
@@ -150,7 +159,7 @@ onset_to_death <- epiparameter::epidist_db(
 )
 
 outbreak <- sim_outbreak(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,

From 2156fcb4f15b8c8e6e057084125bdaf4326bf496 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 12:06:09 +0000
Subject: [PATCH 32/50] update tests to use contact_distribution instead of
 mean_contacts

---
 tests/testthat/test-checkers.R       | 23 ++++++++++++-------
 tests/testthat/test-sim_contacts.R   | 17 +++++++++-----
 tests/testthat/test-sim_linelist.R   | 33 +++++++++++++++++-----------
 tests/testthat/test-sim_network_bp.R | 23 +++++++++++++++++--
 tests/testthat/test-sim_outbreak.R   | 15 +++++++++----
 5 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/tests/testthat/test-checkers.R b/tests/testthat/test-checkers.R
index 59305d67..9d9e1e83 100644
--- a/tests/testthat/test-checkers.R
+++ b/tests/testthat/test-checkers.R
@@ -135,6 +135,13 @@ test_that(".check_age_df fails as expected", {
 })
 
 suppressMessages({
+  contact_distribution <- as.function(epiparameter::epidist(
+    disease = "COVID-19",
+    epi_dist = "contact distribution",
+    prob_distribution = "pois",
+    prob_distribution_params = c(mean = 2)
+  ))
+
   contact_interval <- as.function(epiparameter::epidist(
     disease = "COVID-19",
     epi_dist = "contact interval",
@@ -160,7 +167,7 @@ suppressMessages({
 test_that(".check_sim_input works as expected", {
   chk <- .check_sim_input(
     sim_type = "outbreak",
-    mean_contacts = 1,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     outbreak_start_date = as.Date("2023-01-01"),
@@ -189,7 +196,7 @@ test_that(".check_sim_input works as expected", {
 
   chk <- .check_sim_input(
     sim_type = "linelist",
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     outbreak_start_date = as.Date("2023-01-01"),
@@ -213,7 +220,7 @@ test_that(".check_sim_input works as expected", {
 
   chk <- .check_sim_input(
     sim_type = "contacts",
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     outbreak_start_date = as.Date("2023-01-01"),
@@ -235,17 +242,17 @@ test_that(".check_sim_input fails as expected", {
     regexp = "(arg)*(should be one of)*(linelist)*(contacts)*(outbreak)"
   )
   expect_error(
-    .check_sim_input(sim_type = "outbreak", mean_contacts = 0, prob_infect = 0.5),
-    regexp = "(Assertion on)*(mean_contacts)*(failed)"
+    .check_sim_input(sim_type = "outbreak", contact_distribution = list(), prob_infect = 0.5),
+    regexp = "(Assertion on)*(contact_distribution)*(failed)"
   )
   expect_error(
-    .check_sim_input(sim_type = "outbreak", mean_contacts = 2, contact_interval = list(), prob_infect = 0.5),
+    .check_sim_input(sim_type = "outbreak", contact_distribution = contact_distribution, contact_interval = list(), prob_infect = 0.5),
     regexp = "(Assertion on)*(contact_interval)*(failed)"
   )
   expect_error(
     .check_sim_input(
       sim_type = "outbreak",
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       outbreak_start_date = "01-01-2023"
@@ -255,7 +262,7 @@ test_that(".check_sim_input fails as expected", {
   expect_error(
     .check_sim_input(
       sim_type = "outbreak",
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       outbreak_start_date = as.Date("2023-01-01"),
diff --git a/tests/testthat/test-sim_contacts.R b/tests/testthat/test-sim_contacts.R
index ade7eec5..08edfe7d 100644
--- a/tests/testthat/test-sim_contacts.R
+++ b/tests/testthat/test-sim_contacts.R
@@ -1,4 +1,11 @@
 suppressMessages({
+  contact_distribution <- epiparameter::epidist(
+    disease = "COVID-19",
+    epi_dist = "contact distribution",
+    prob_distribution = "pois",
+    prob_distribution_params = c(mean = 2)
+  )
+
   contact_interval <- epiparameter::epidist(
     disease = "COVID-19",
     epi_dist = "contact interval",
@@ -10,7 +17,7 @@ suppressMessages({
 test_that("sim_contacts works as expected", {
   set.seed(1)
   contacts <- sim_contacts(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5
   )
@@ -29,7 +36,7 @@ test_that("sim_contacts works as expected", {
 test_that("sim_contacts works as expected with modified config", {
   set.seed(1)
   contacts <- sim_contacts(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     config = create_config(
@@ -52,7 +59,7 @@ test_that("sim_contacts works as expected with modified config", {
 test_that("sim_contacts works as expected with modified config parameters", {
   set.seed(1)
   contacts <- sim_contacts(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     config = create_config(
@@ -74,7 +81,7 @@ test_that("sim_contacts works as expected with modified config parameters", {
 test_that("sim_contacts fails as expected with modified config", {
   expect_error(
     sim_contacts(
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       config = create_config(
@@ -88,7 +95,7 @@ test_that("sim_contacts fails as expected with modified config", {
 test_that("sim_contacts fails as expected with empty config", {
   expect_error(
     sim_contacts(
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       config = list()
diff --git a/tests/testthat/test-sim_linelist.R b/tests/testthat/test-sim_linelist.R
index 8aadc14a..65b03995 100644
--- a/tests/testthat/test-sim_linelist.R
+++ b/tests/testthat/test-sim_linelist.R
@@ -1,4 +1,11 @@
 suppressMessages({
+  contact_distribution <- epiparameter::epidist(
+    disease = "COVID-19",
+    epi_dist = "contact distribution",
+    prob_distribution = "pois",
+    prob_distribution_params = c(mean = 2)
+  )
+
   contact_interval <- epiparameter::epidist(
     disease = "COVID-19",
     epi_dist = "contact interval",
@@ -24,7 +31,7 @@ suppressMessages({
 test_that("sim_linelist works as expected", {
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -58,7 +65,7 @@ test_that("sim_linelist works as expected with age-strat risks", {
   )
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -83,7 +90,7 @@ test_that("sim_linelist works as expected with age-strat risks", {
 test_that("sim_linelist works as expected without Ct", {
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -106,7 +113,7 @@ test_that("sim_linelist works as expected without Ct", {
 test_that("sim_linelist works as expected with anonymous", {
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -134,7 +141,7 @@ test_that("sim_linelist works as expected with age structure", {
   )
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -166,7 +173,7 @@ test_that("sim_linelist works as expected with age-strat risks & age struct", {
   )
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -195,7 +202,7 @@ test_that("sim_linelist gives expected proportion of ages with age struct", {
   )
   set.seed(3)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -225,7 +232,7 @@ test_that("sim_linelist gives expected proportion of ages with age struct", {
 test_that("sim_linelist works as expected with modified config", {
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -251,7 +258,7 @@ test_that("sim_linelist works as expected with modified config", {
 test_that("sim_linelist works as expected with modified config parameters", {
   set.seed(1)
   linelist <- sim_linelist(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -276,7 +283,7 @@ test_that("sim_linelist works as expected with modified config parameters", {
 test_that("sim_linelist fails as expected with modified config", {
   expect_error(
     sim_linelist(
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       onset_to_hosp = onset_to_hosp,
@@ -290,7 +297,7 @@ test_that("sim_linelist fails as expected with modified config", {
 
   expect_error(
     sim_linelist(
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       onset_to_hosp = onset_to_hosp,
@@ -307,7 +314,7 @@ test_that("sim_linelist fails as expected with modified config", {
 test_that("sim_linelist fails as expected with empty config", {
   expect_error(
     sim_linelist(
-      mean_contacts = 2,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.5,
       onset_to_hosp = onset_to_hosp,
@@ -322,7 +329,7 @@ test_that("sim_linelist fails as expected exceeding max iter for bp model", {
   set.seed(1)
   expect_error(
     sim_linelist(
-      mean_contacts = 1,
+      contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = 0.1,
       onset_to_hosp = onset_to_hosp,
diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R
index bc132815..920ff5de 100644
--- a/tests/testthat/test-sim_network_bp.R
+++ b/tests/testthat/test-sim_network_bp.R
@@ -1,4 +1,13 @@
 suppressMessages({
+  contact_distribution <- as.function(
+    epiparameter::epidist(
+      disease = "COVID-19",
+      epi_dist = "contact distribution",
+      prob_distribution = "pois",
+      prob_distribution_params = c(mean = 2)
+    )
+  )
+
   contact_interval <- as.function(
     epiparameter::epidist(
       disease = "COVID-19",
@@ -12,7 +21,7 @@ suppressMessages({
 test_that(".sim_network_bp works as expected", {
   set.seed(1)
   res <- .sim_network_bp(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     add_names = TRUE
@@ -26,9 +35,19 @@ test_that(".sim_network_bp works as expected", {
 })
 
 test_that(".sim_network_bp works as expected with no contacts", {
+  suppressMessages(
+    contact_distribution <- as.function(
+      epiparameter::epidist(
+        disease = "COVID-19",
+        epi_dist = "contact distribution",
+        prob_distribution = "pois",
+        prob_distribution_params = c(mean = 1)
+      )
+    )
+  )
   set.seed(4)
   res <- .sim_network_bp(
-    mean_contacts = 1,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     add_names = TRUE
diff --git a/tests/testthat/test-sim_outbreak.R b/tests/testthat/test-sim_outbreak.R
index d8892351..30bcc74e 100644
--- a/tests/testthat/test-sim_outbreak.R
+++ b/tests/testthat/test-sim_outbreak.R
@@ -1,4 +1,11 @@
 suppressMessages({
+  contact_distribution <- epiparameter::epidist(
+    disease = "COVID-19",
+    epi_dist = "contact distribution",
+    prob_distribution = "pois",
+    prob_distribution_params = c(mean = 2)
+  )
+
   contact_interval <- epiparameter::epidist(
     disease = "COVID-19",
     epi_dist = "contact interval",
@@ -24,7 +31,7 @@ suppressMessages({
 test_that("sim_outbreak works as expected", {
   set.seed(1)
   outbreak <- sim_outbreak(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -56,7 +63,7 @@ test_that("sim_outbreak works as expected", {
 test_that("sim_outbreak works as expected with add_names = FALSE", {
   set.seed(1)
   outbreak <- sim_outbreak(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -101,7 +108,7 @@ test_that("sim_outbreak works as expected with age-strat risks", {
   )
   set.seed(1)
   outbreak <- sim_outbreak(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,
@@ -141,7 +148,7 @@ test_that("sim_outbreak works as expected with age structure", {
   )
   set.seed(1)
   outbreak <- sim_outbreak(
-    mean_contacts = 2,
+    contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
     onset_to_hosp = onset_to_hosp,

From 3888aeb63c06f822624ef0de12bee42974a1bf5b Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 12:07:29 +0000
Subject: [PATCH 33/50] update vignettes and README to use contact_distribution
 instead of mean_contacts

---
 README.Rmd                      | 20 ++++++++++++------
 vignettes/age-strat-risks.Rmd   | 23 +++++++++++++--------
 vignettes/age-struct-pop.Rmd    | 19 +++++++++++------
 vignettes/design-principles.Rmd |  2 +-
 vignettes/simulist.Rmd          | 36 +++++++++++++++++++++++----------
 vignettes/vis-linelist.Rmd      | 22 +++++++++++++++++---
 6 files changed, 87 insertions(+), 35 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 80d777dd..d416edc0 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -50,9 +50,17 @@ library(simulist)
 library(epiparameter)
 ```
 
-The line list simulation requires that we define a contact interval, onset-to-hospitalisation delay, and onset-to-death delay. We can load these from the library of epidemiological parameters in the `{epiparameter}` R package if available, or if these are not in the database yet (such as the contact interval for COVID-19) we can define them ourselves.
+The line list simulation requires that we define a contact distribution, contact interval, onset-to-hospitalisation delay, and onset-to-death delay. We can load these from the library of epidemiological parameters in the `{epiparameter}` R package if available, or if these are not in the database yet (such as the contact interval for COVID-19) we can define them ourselves.
 
 ```{r create-epidists}
+# create COVID-19 contact distribution
+contact_distribution <- epiparameter::epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 2)
+)
+
 # create COVID-19 contact interval
 contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
@@ -76,12 +84,12 @@ onset_to_death <- epiparameter::epidist_db(
 )
 ```
 
-To simulate a line list for COVID-19 with an assumed average number of contacts of 2 and a probability of infection per contact of 0.5, we use the `sim_linelist()` function. The mean number of contacts and probability of infection determine the outbreak reproduction number, if the resulting reproduction number is around one it means we will likely get a reasonably sized outbreak (10 - 1000 cases, varying due to the stochastic simulation). _Take care when setting the mean number of contacts and the probability of infection, as this can lead to the outbreak becoming extremely large_.
+To simulate a line list for COVID-19 with an Poisson contact distribution with a mean number of contacts of 2 and a probability of infection per contact of 0.5, we use the `sim_linelist()` function. The mean number of contacts and probability of infection determine the outbreak reproduction number, if the resulting reproduction number is around one it means we will likely get a reasonably sized outbreak (10 - 1,000 cases, varying due to the stochastic simulation). _Take care when setting the mean number of contacts and the probability of infection, as this can lead to the outbreak becoming extremely large_.
 
 ```{r sim-linelist}
 set.seed(1)
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -94,7 +102,7 @@ In this example, the line list is simulated using the default values (see `?sim_
 
 ```{r sim-linelist-diff-args}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -109,7 +117,7 @@ To simulate a table of contacts of cases (i.e. to reflect a contact tracing data
 
 ```{r, sim-contacts}
 contacts <- sim_contacts(
-  mean_contacts = 2, 
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval, 
   prob_infect = 0.5
 )
@@ -120,7 +128,7 @@ If both the line list and contacts table are required, they can be jointly simul
 
 ```{r, sim-outbreak}
 outbreak <- sim_outbreak(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
diff --git a/vignettes/age-strat-risks.Rmd b/vignettes/age-strat-risks.Rmd
index d0a4d671..c6905957 100644
--- a/vignettes/age-strat-risks.Rmd
+++ b/vignettes/age-strat-risks.Rmd
@@ -44,6 +44,13 @@ library(epiparameter)
 Here is an example that uses the default hospitalisation and death risks (inside and outside of hospital). First we load the requested delay distributions using the {epiparameter} package.
 
 ```{r, read-delay-dists}
+contact_distribution <- epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 2)
+)
+
 contact_interval <- epidist(
   disease = "COVID-19",
   epi_dist = "contact interval",
@@ -82,7 +89,7 @@ Simulate a line list with population-wide default risks:
 
 ```{r, sim-linelist}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -97,7 +104,7 @@ We can run another simulation and change the hospitalisation and death risks, in
 
 ```{r, sim-linelist-diff-risks}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -132,7 +139,7 @@ age_dep_hosp_risk
 
 ```{r, sim-age-strat-linelist}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -161,7 +168,7 @@ age_dep_hosp_risk <- data.frame(
 age_dep_hosp_risk
 
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -179,7 +186,7 @@ age_dep_hosp_risk <- data.frame(
 )
 
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -203,7 +210,7 @@ age_dep_hosp_death_risk <- data.frame(
 age_dep_hosp_death_risk
 
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -220,7 +227,7 @@ age_dep_non_hosp_death_risk <- data.frame(
 age_dep_non_hosp_death_risk
 
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -246,7 +253,7 @@ age_dep_non_hosp_death_risk <- data.frame(
 )
 
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
diff --git a/vignettes/age-struct-pop.Rmd b/vignettes/age-struct-pop.Rmd
index 207b4c97..40c138e7 100644
--- a/vignettes/age-struct-pop.Rmd
+++ b/vignettes/age-struct-pop.Rmd
@@ -22,7 +22,7 @@ knitr::opts_chunk$set(
 If you are unfamiliar with the {simulist} package or the `sim_linelist()` function [Get Started vignette](simulist.html) is a great place to start.
 :::
 
-This vignette describes how to simulate a line list with either a uniform or non-uniform distribution of ages within the population. This population age structure may either describe the population of a region as a whole, or specifically which age groups are more likely to become infected than others, for instance because they have more social contacts.
+This vignette describes how to simulate a line list with either a uniform or non-uniform distribution of ages within the population. This population age structure may either describe the population of a region as a whole, or specifically which age groups are more likely to become infected than others.
 
 The {simulist} package uses a branching process (using a random network model of contacts), which is independent of population age structure to simulate the line list, and then {simulist} _paints_ the demographic information onto the infected individuals (and in the case of `sim_outbreak()` or `sim_contacts()` the contacts of infected individuals). In other words, once the cases in the line list have been simulated the ages are assigned to each individual post hoc, specified by the age range, and age structure, if supplied.
 
@@ -34,9 +34,16 @@ library(epiparameter)
 library(ggplot2)
 ```
 
-First we load the requested delay distributions using the {epiparameter} package. The onset-to-hospitalisation and onset-to-death delay distributions are extracted from the {epiparameter} database, and the serial interval is manually defined as it is not yet available from the {epiparameter} database.
+First we load the requested delay distributions using the {epiparameter} package. The onset-to-hospitalisation and onset-to-death delay distributions are extracted from the {epiparameter} database, and the contact distribution and the contact interval are manually defined as they are not yet available from the {epiparameter} database.
 
 ```{r, read-delay-dists}
+contact_distribution <- epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 2)
+)
+
 contact_interval <- epidist(
   disease = "COVID-19",
   epi_dist = "contact interval",
@@ -67,7 +74,7 @@ set.seed(1)
 
 ## Uniform population age
 
-By default `sim_linelist()` simulates individuals ages assuming a uniform distribution between 1 and 90. To change this age range, a vector of two numbers can be supplied to the `population_age` argument. Here we simulated an outbreak in a population with a population ranging from 5 to 75 (inclusive, `[5,75]`).
+By default `sim_linelist()` simulates individuals ages assuming a uniform distribution between 1 and 90. To change this age range, a vector of two numbers can be supplied to the `population_age` argument. Here we simulate an outbreak in a population with a population ranging from 5 to 75 (inclusive, `[5,75]`).
 
 ***Note***: all ages are assumed to be in the unit of years and need to be integers (or at least ["integerish"](https://rlang.r-lib.org/reference/is_integerish.html) if stored as double).
 
@@ -75,7 +82,7 @@ All simulations in this vignette condition the simulation to have a minimum outb
 
 ```{r sim-linelist-age-range}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.45,
   onset_to_hosp = onset_to_hosp,
@@ -120,7 +127,7 @@ age_struct
 
 ```{r, sim-age-struct-linelist}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.45,
   onset_to_hosp = onset_to_hosp,
@@ -173,7 +180,7 @@ age_struct
 
 ```{r, sim-age-struct-linelist-young}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.45,
   onset_to_hosp = onset_to_hosp,
diff --git a/vignettes/design-principles.Rmd b/vignettes/design-principles.Rmd
index e540c3ae..9db296f0 100644
--- a/vignettes/design-principles.Rmd
+++ b/vignettes/design-principles.Rmd
@@ -30,7 +30,7 @@ The simulation functions either return a `<data.frame>` or a `list` of `<data.fr
 
 - When using age-stratified risks of hospitalisation and deaths (see [Age-stratified hospitalisation and death risks vignette](age-strat-risks.html) for details) there is an interaction between function arguments. The `<data.frame>` that defines the age-stratification in `hosp_risk`, `hosp_death_risk` and `non_hosp_death_risk` arguments gives the lower bound of the age groups. The upper bound of the age groups is derived from the next lower bound, but the upper bound oldest age group is defined by the upper age given to the `population_age` argument. This interaction of arguments is not ideal, as it can be more difficult to understand for users (as outlined in [The Tidy Design book](https://design.tidyverse.org/independent-meaning.html#whats-the-problem)), however, the interaction does not change the interpretation of each argument which would be more convoluted. This design decision was taken when we changed the structure of the `<data.frame>` accepted as input to the `*_risk` arguments from having two columns for a lower and upper age limit, to a single column of lower age bounds. This change was made in [pull request #14](https://github.com/epiverse-trace/simulist/pull/14) and follows the design of [{socialmixr}](https://github.com/epiforecasts/socialmixr/blob/main/R/contact_matrix.r) for defining age bounds. The newer structure is judged to be preferred as it prevents input errors by the user when the age bounds are overlapping or non-contiguous (i.e. missing age groups).
 
-- The column names of the contact relationships (edges of the network) are called `from` and `to`. Names of contacts table match {epicontacts} `<epicontacts>` objects. If the column names of the two contacts provided to `epicontacts::make_epicontacts()` arguments `from` and `to` are not `from` and `to` they will be silently renamed in the resulting `<epicontacts>` object. By making these column names `from` and `to` when output from `sim_contacts()` or `sim_outbreak()` it prevents any confusion when used with {epicontacts}. This names are also preferred as they are usefully descriptive.
+- The column names of the contact relationships (edges of the network) are called `from` and `to`. Names of the contacts table match {epicontacts} `<epicontacts>` objects. If the column names of the two contacts provided to `epicontacts::make_epicontacts()` arguments `from` and `to` are not `from` and `to` they will be silently renamed in the resulting `<epicontacts>` object. By making these column names `from` and `to` when output from `sim_contacts()` or `sim_outbreak()` it prevents any confusion when used with {epicontacts}. This naming is also preferred as they are usefully descriptive.
 
 - The [Visualising simulated data vignette](vis-linelist.html) contains interactive data visualisation when rendered to the web. This enforces some limitations. The vignette uses `output: rmarkdown::html_document()` instead of `output: bookdown::html_vignette2` and does not contain `pkgdown: as_is: true` in the yaml metadata, in order for the interactive figures to render and operate correctly. This means that the vignette figures will not automatically be numbered and start with "Figure _x_" (where _x_ is replaced by a number). Instead, it was decided for this vignette that this information would be manually written, and then manually updated if the number or order of the figures changed. This is not an ideal solution and automation is preferred, but on balance, it was decided that the addition of interactive visualisation from {epicontacts} outweighed the downside of manual figure labelling.
 
diff --git a/vignettes/simulist.Rmd b/vignettes/simulist.Rmd
index f1955eb8..7b4b23a1 100644
--- a/vignettes/simulist.Rmd
+++ b/vignettes/simulist.Rmd
@@ -32,6 +32,14 @@ library(epiparameter)
 First we load in some data that is required for the line list simulation. Data on epidemiological parameters and distributions are read from the {epiparameter} R package.
 
 ```{r read-epidist}
+# create contact distribution (not available from {epiparameter} database)
+contact_distribution <- epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 2)
+)
+
 # create contact interval (not available from {epiparameter} database)
 contact_interval <- epidist(
   disease = "COVID-19",
@@ -61,11 +69,11 @@ The seed is set to ensure the output of the vignette is consistent. When using {
 set.seed(123)
 ```
 
-The mean (average) number of contacts (`mean_contacts`) is the first argument in `sim_linelist()` and with the contact interval and probability of infection per contact (`prob_infect`) will control the growth rate of the simulated epidemic. Here we set the mean number of contacts as 2 and the probability of infection as 0.5 (on average half of contacts become infected). The minimum requirements to simulate a line list are the mean number of contacts (`mean_contacts`), the serial interval, probability of infection, onset-to-hospitalisation delay and onset-to-death delay.
+The first argument in `sim_linelist()` is the contact distribution (`contact_distribution`), which here we specify as Poisson distribution with a mean (average) number of contacts of 2, and with the contact interval and probability of infection per contact (`prob_infect`) will control the growth rate of the simulated epidemic. Here we set the probability of infection as 0.5 (on average half of contacts become infected). The minimum requirements to simulate a line list are the contact distribution, the contact interval, probability of infection, onset-to-hospitalisation delay and onset-to-death delay.
 
 ```{r, sim-linelist}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -76,7 +84,7 @@ head(linelist)
 
 ## Case type 
 
-During an infectious disease outbreak it may not be possible to confirm every infection as a case. A confirmed case is typically defined via a diagnostic test. There are several reasons why a case may not be confirmed, including limited testing capacity and mild or non-specific early symptoms, especially in fast growing epidemics. We therefore include two other categories for cases: probable and suspected. For example, probable cases may those that show clinical evidence for the disease but have not, or cannot, be confirmed by a diagnostic test. Suspected cases are those that are possibly infected but do not show clear clinical or epidemiological evidence, nor has a diagnostic test been performed. Hence the distribution of suspected/probable/confirmed will depend on the pathogen characteristics, outbreak-specific definitions, and resources available.
+During an infectious disease outbreak it may not be possible to confirm every infection as a case. A confirmed case is typically defined via a diagnostic test. There are several reasons why a case may not be confirmed, including limited testing capacity and mild or non-specific early symptoms, especially in fast growing epidemics. We therefore include two other categories for cases: probable and suspected. For example, probable cases may be those that show clinical evidence for the disease but have not, or cannot, be confirmed by a diagnostic test. Suspected cases are those that are possibly infected but do not show clear clinical or epidemiological evidence, nor has a diagnostic test been performed. Hence the distribution of suspected/probable/confirmed will depend on the pathogen characteristics, outbreak-specific definitions, and resources available.
 
 The line list output from the {simulist} simulation contains a column (`case_type`) with the type of case. 
 
@@ -84,7 +92,7 @@ The line list output from the {simulist} simulation contains a column (`case_typ
 
 ```{r, sim-linelist-default-case-type}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -97,7 +105,7 @@ To alter these probabilities, supply a named vector to the `sim_linelist()` argu
 
 ```{r, sim-linelist-mod-case-type}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -121,7 +129,7 @@ When requiring a line list that represents a large outbreak, such as the COVID-1
 
 ```{r, sim-large-linelist}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -131,7 +139,7 @@ linelist <- sim_linelist(
 head(linelist)
 ```
 
-The amount of time the simulation takes can be determined by the mean number of contacts (`mean_contacts`), the probability of infection (`prob_infect`) and the minimum outbreak size (`min_outbreak_size`). If the `min_outbreak_size` is large, for example hundreds or thousands of cases, and the mean number of contacts and probability of infection mean the reproduction number is below one, it will take many branching process simulations until finding one that produces a sufficiently large epidemic.
+The amount of time the simulation takes can be determined by the mean of the contact distribution (`contact_distribution`), the probability of infection (`prob_infect`) and the minimum outbreak size (`min_outbreak_size`). If the `min_outbreak_size` is large, for example hundreds or thousands of cases, and the mean number of contacts and probability of infection mean the reproduction number is below one, it will take many branching process simulations until finding one that produces a sufficiently large epidemic.
 
 ## Anonymous line list
 
@@ -139,7 +147,7 @@ By default `sim_linelist()` provides the name of each individual in the line lis
 
 ```{r sim-anon-linelist}
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -165,7 +173,7 @@ To simulate a contacts table, the `sim_contacts()` function can be used. This re
 
 ```{r, sim-contacts}
 contacts <- sim_contacts(
-  mean_contacts = 2, 
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval, 
   prob_infect = 0.5)
 head(contacts)
@@ -179,7 +187,7 @@ In order to simulate a line list and a contacts table of the same outbreak the `
 
 ```{r, sim-outbreak}
 outbreak <- sim_outbreak(
-  mean_contacts = 2, 
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval, 
   prob_infect = 0.5, 
   onset_to_hosp = onset_to_hosp, 
@@ -197,7 +205,7 @@ It is possible to use an [anonymous function](https://en.wikipedia.org/wiki/Anon
 
 ```{r, sim-outbreak-anon-func}
 outbreak <- sim_outbreak(
-  mean_contacts = 2,
+  contact_distribution = function(x) dpois(x = x, lambda = 2),
   contact_interval = function(x) rgamma(n = x, shape = 2, scale = 2),
   prob_infect = 0.5,
   onset_to_hosp = function(x) rlnorm(n = x, meanlog = 1.5, sdlog = 0.5),
@@ -207,4 +215,10 @@ head(outbreak$linelist)
 head(outbreak$contacts)
 ```
 
+::: {.alert .alert-warning}
+
+The `contact_distribution` requires a density function instead of a random number generation function (i.e. `dpois()` or `dnbinom()` instead of `rpois()` or `rnbinom()`). This is due to the branching process simulation adjusting the sampling of contacts to take into account the random network effect.
+
+:::
+
 The same approach of using anonymous functions can be used in `sim_linelist()` and `sim_contacts()`.
diff --git a/vignettes/vis-linelist.Rmd b/vignettes/vis-linelist.Rmd
index dd4023cf..744b4160 100644
--- a/vignettes/vis-linelist.Rmd
+++ b/vignettes/vis-linelist.Rmd
@@ -28,6 +28,13 @@ library(epicontacts)
 First we load the required delay distributions using the {epiparameter} package.
 
 ```{r, read-delay-dists}
+contact_distribution <- epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 3)
+)
+
 contact_interval <- epidist(
   disease = "COVID-19",
   epi_dist = "contact interval",
@@ -60,7 +67,7 @@ Using a simple line list simulation with the factory default settings:
 
 ```{r sim-linelist} 
 linelist <- sim_linelist(
-  mean_contacts = 3,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.33,
   onset_to_hosp = onset_to_hosp,
@@ -162,12 +169,21 @@ Additionally, {epicontacts} provides access to network plotting from JavaScript
 
 :::
 
-The {epicontacts} function `make_epicontacts()` requires both the line list and contacts table, so we will run the `sim_outbreak()` function to produce both. We will use the same epidemiological delay distributions that we used to simulate a line list above.
+The {epicontacts} function `make_epicontacts()` requires both the line list and contacts table, so we will run the `sim_outbreak()` function to produce both. We will use the same epidemiological delay distributions that we used to simulate a line list above, but reduce the mean number of contacts in the contact distribution to 2.
+
+```{r, contact-distribution}
+contact_distribution <- epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 2)
+)
+```
 
 ```{r, sim-outbreak}
 set.seed(1)
 outbreak <- sim_outbreak(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,

From 282057aec3573d96f0a35ff4b7cd3f02a4a39685 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Fri, 19 Jan 2024 12:09:03 +0000
Subject: [PATCH 34/50] Automatic readme update

---
 README.md | 47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 69b07eb3..6bfe6bbe 100644
--- a/README.md
+++ b/README.md
@@ -50,14 +50,23 @@ library(simulist)
 library(epiparameter)
 ```
 
-The line list simulation requires that we define a contact interval,
-onset-to-hospitalisation delay, and onset-to-death delay. We can load
-these from the library of epidemiological parameters in the
-`{epiparameter}` R package if available, or if these are not in the
-database yet (such as the contact interval for COVID-19) we can define
-them ourselves.
+The line list simulation requires that we define a contact distribution,
+contact interval, onset-to-hospitalisation delay, and onset-to-death
+delay. We can load these from the library of epidemiological parameters
+in the `{epiparameter}` R package if available, or if these are not in
+the database yet (such as the contact interval for COVID-19) we can
+define them ourselves.
 
 ``` r
+# create COVID-19 contact distribution
+contact_distribution <- epiparameter::epidist(
+  disease = "COVID-19", 
+  epi_dist = "contact distribution", 
+  prob_distribution = "pois", 
+  prob_distribution_params = c(mean = 2)
+)
+#> Citation cannot be created as author, year, journal or title is missing
+
 # create COVID-19 contact interval
 contact_interval <- epiparameter::epidist(
   disease = "COVID-19",
@@ -96,20 +105,20 @@ onset_to_death <- epiparameter::epidist_db(
 #> To retrieve the short citation use the 'get_citation' function
 ```
 
-To simulate a line list for COVID-19 with an assumed average number of
-contacts of 2 and a probability of infection per contact of 0.5, we use
-the `sim_linelist()` function. The mean number of contacts and
-probability of infection determine the outbreak reproduction number, if
-the resulting reproduction number is around one it means we will likely
-get a reasonably sized outbreak (10 - 1000 cases, varying due to the
-stochastic simulation). *Take care when setting the mean number of
-contacts and the probability of infection, as this can lead to the
-outbreak becoming extremely large*.
+To simulate a line list for COVID-19 with an Poisson contact
+distribution with a mean number of contacts of 2 and a probability of
+infection per contact of 0.5, we use the `sim_linelist()` function. The
+mean number of contacts and probability of infection determine the
+outbreak reproduction number, if the resulting reproduction number is
+around one it means we will likely get a reasonably sized outbreak (10 -
+1,000 cases, varying due to the stochastic simulation). *Take care when
+setting the mean number of contacts and the probability of infection, as
+this can lead to the outbreak becoming extremely large*.
 
 ``` r
 set.seed(1)
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -140,7 +149,7 @@ modify either of these, we can specify them in the function.
 
 ``` r
 linelist <- sim_linelist(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,
@@ -171,7 +180,7 @@ above.
 
 ``` r
 contacts <- sim_contacts(
-  mean_contacts = 2, 
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval, 
   prob_infect = 0.5
 )
@@ -200,7 +209,7 @@ the same default settings as the other functions).
 
 ``` r
 outbreak <- sim_outbreak(
-  mean_contacts = 2,
+  contact_distribution = contact_distribution,
   contact_interval = contact_interval,
   prob_infect = 0.5,
   onset_to_hosp = onset_to_hosp,

From b428d1cdfba7232c69d5f42f7b6fe6e2aa4c9aa4 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Fri, 19 Jan 2024 12:16:37 +0000
Subject: [PATCH 35/50] linting

---
 R/add_cols.R                  |  6 ++++--
 R/sim_contacts.R              |  4 +++-
 R/sim_linelist.R              |  4 +++-
 R/sim_network_bp.R            | 18 +++++++++---------
 R/sim_outbreak.R              |  4 +++-
 man/dot-sim_network_bp.Rd     | 12 ++++++------
 vignettes/age-strat-risks.Rmd |  6 +++---
 vignettes/age-struct-pop.Rmd  |  6 +++---
 vignettes/simulist.Rmd        | 14 +++++++-------
 vignettes/vis-linelist.Rmd    | 14 +++++++-------
 10 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/R/add_cols.R b/R/add_cols.R
index c076b88f..78866f25 100644
--- a/R/add_cols.R
+++ b/R/add_cols.R
@@ -84,7 +84,8 @@ NULL
   infected_idx <- .data$infected == "infected"
   num_infected <- sum(infected_idx)
   .data$hospitalisation <- NA_real_
-  .data$hospitalisation[infected_idx] <- .data$time[infected_idx] + onset_to_hosp(num_infected)
+  .data$hospitalisation[infected_idx] <- .data$time[infected_idx] +
+    onset_to_hosp(num_infected)
 
   if (is.numeric(hosp_risk)) {
     pop_sample <- sample(
@@ -119,7 +120,8 @@ NULL
   infected_idx <- .data$infected == "infected"
   num_infected <- sum(infected_idx)
   .data$deaths <- NA_real_
-  .data$deaths[infected_idx] <- .data$time[infected_idx] + onset_to_death(num_infected)
+  .data$deaths[infected_idx] <- .data$time[infected_idx] +
+    onset_to_death(num_infected)
 
   apply_death_risk <- function(.data, risk, hosp = TRUE) {
     if (is.numeric(risk)) {
diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index f41b37dd..fcc73818 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -49,7 +49,9 @@ sim_contacts <- function(contact_distribution,
     "Input delay distributions need to be either functions or <epidist>" =
       inherits(contact_interval, c("function", "epidist"))
   )
-  contact_distribution <- as.function(contact_distribution, func_type = "density")
+  contact_distribution <- as.function(
+    contact_distribution, func_type = "density"
+  )
   contact_interval <- as.function(contact_interval, func_type = "generate")
 
   .check_sim_input(
diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index 02942954..7d0bc768 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -159,7 +159,9 @@ sim_linelist <- function(contact_distribution,
       inherits(onset_to_hosp, c("function", "epidist")) &&
       inherits(onset_to_death, c("function", "epidist"))
   )
-  contact_distribution <- as.function(contact_distribution, func_type = "density")
+  contact_distribution <- as.function(
+    contact_distribution, func_type = "density"
+  )
   contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index d4f53201..d2bdc5ec 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -10,12 +10,12 @@
 #'
 #' @details
 #' The contact distribution sampled takes the network effect
-#' \eqn{q(n) ~ (n + 1)p(n + 1)} where \eqn{p(n)} is the probability density function
-#' of a distribution, e.g., Poisson or Negative binomial. That is to say, the
-#' probability of having choosing a contact at random by following up a
-#' contact chooses individuals with a probability proportional to their number
-#' of contacts. The plus one is because one of the contacts was "used" to
-#' infect the person.
+#' \eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability
+#' density function of a distribution, e.g., Poisson or Negative binomial.
+#' That is to say, the probability of having choosing a contact at random by
+#' following up a contact chooses individuals with a probability proportional
+#' to their number of contacts. The plus one is because one of the contacts
+#' was "used" to infect the person.
 #'
 #' @inheritParams sim_linelist
 #'
@@ -62,9 +62,9 @@
 
       if (add_names && chain_size > 5180) {
         warning(
-          "Returning early as total number of contacts exceeds number of names ",
-          "available from {randomNames}. \n If you want to simulate a larger ",
-          "line lists set add_names = FALSE",
+          "Returning early as total number of contacts exceeds number of ",
+          "names available from {randomNames}. \n If you want to simulate ",
+          "a larger line lists set add_names = FALSE",
           call. = FALSE
         )
         break
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index 665bbe55..eb819a4e 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -85,7 +85,9 @@ sim_outbreak <- function(contact_distribution,
       inherits(onset_to_hosp, c("function", "epidist")) &&
       inherits(onset_to_death, c("function", "epidist"))
   )
-  contact_distribution <- as.function(contact_distribution, func_type = "density")
+  contact_distribution <- as.function(
+    contact_distribution, func_type = "density"
+  )
   contact_interval <- as.function(contact_interval, func_type = "generate")
   onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate")
   onset_to_death <- as.function(onset_to_death, func_type = "generate")
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
index 9a92853d..3cfbe456 100644
--- a/man/dot-sim_network_bp.Rd
+++ b/man/dot-sim_network_bp.Rd
@@ -37,11 +37,11 @@ by the contact interval function.
 }
 \details{
 The contact distribution sampled takes the network effect
-\eqn{q(n) ~ (n + 1)p(n + 1)} where \eqn{p(n)} is the probability density function
-of a distribution, e.g., Poisson or Negative binomial. That is to say, the
-probability of having choosing a contact at random by following up a
-contact chooses individuals with a probability proportional to their number
-of contacts. The plus one is because one of the contacts was "used" to
-infect the person.
+\eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability
+density function of a distribution, e.g., Poisson or Negative binomial.
+That is to say, the probability of having choosing a contact at random by
+following up a contact chooses individuals with a probability proportional
+to their number of contacts. The plus one is because one of the contacts
+was "used" to infect the person.
 }
 \keyword{internal}
diff --git a/vignettes/age-strat-risks.Rmd b/vignettes/age-strat-risks.Rmd
index c6905957..cc3939e7 100644
--- a/vignettes/age-strat-risks.Rmd
+++ b/vignettes/age-strat-risks.Rmd
@@ -45,9 +45,9 @@ Here is an example that uses the default hospitalisation and death risks (inside
 
 ```{r, read-delay-dists}
 contact_distribution <- epidist(
-  disease = "COVID-19", 
-  epi_dist = "contact distribution", 
-  prob_distribution = "pois", 
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
   prob_distribution_params = c(mean = 2)
 )
 
diff --git a/vignettes/age-struct-pop.Rmd b/vignettes/age-struct-pop.Rmd
index 40c138e7..ab407c63 100644
--- a/vignettes/age-struct-pop.Rmd
+++ b/vignettes/age-struct-pop.Rmd
@@ -38,9 +38,9 @@ First we load the requested delay distributions using the {epiparameter} package
 
 ```{r, read-delay-dists}
 contact_distribution <- epidist(
-  disease = "COVID-19", 
-  epi_dist = "contact distribution", 
-  prob_distribution = "pois", 
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
   prob_distribution_params = c(mean = 2)
 )
 
diff --git a/vignettes/simulist.Rmd b/vignettes/simulist.Rmd
index 7b4b23a1..68d9c5fd 100644
--- a/vignettes/simulist.Rmd
+++ b/vignettes/simulist.Rmd
@@ -34,9 +34,9 @@ First we load in some data that is required for the line list simulation. Data o
 ```{r read-epidist}
 # create contact distribution (not available from {epiparameter} database)
 contact_distribution <- epidist(
-  disease = "COVID-19", 
-  epi_dist = "contact distribution", 
-  prob_distribution = "pois", 
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
   prob_distribution_params = c(mean = 2)
 )
 
@@ -174,7 +174,7 @@ To simulate a contacts table, the `sim_contacts()` function can be used. This re
 ```{r, sim-contacts}
 contacts <- sim_contacts(
   contact_distribution = contact_distribution,
-  contact_interval = contact_interval, 
+  contact_interval = contact_interval,
   prob_infect = 0.5)
 head(contacts)
 ```
@@ -188,9 +188,9 @@ In order to simulate a line list and a contacts table of the same outbreak the `
 ```{r, sim-outbreak}
 outbreak <- sim_outbreak(
   contact_distribution = contact_distribution,
-  contact_interval = contact_interval, 
-  prob_infect = 0.5, 
-  onset_to_hosp = onset_to_hosp, 
+  contact_interval = contact_interval,
+  prob_infect = 0.5,
+  onset_to_hosp = onset_to_hosp,
   onset_to_death = onset_to_death
 )
 head(outbreak$linelist)
diff --git a/vignettes/vis-linelist.Rmd b/vignettes/vis-linelist.Rmd
index 744b4160..f6b9fc9d 100644
--- a/vignettes/vis-linelist.Rmd
+++ b/vignettes/vis-linelist.Rmd
@@ -29,9 +29,9 @@ First we load the required delay distributions using the {epiparameter} package.
 
 ```{r, read-delay-dists}
 contact_distribution <- epidist(
-  disease = "COVID-19", 
-  epi_dist = "contact distribution", 
-  prob_distribution = "pois", 
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
   prob_distribution_params = c(mean = 3)
 )
 
@@ -71,7 +71,7 @@ linelist <- sim_linelist(
   contact_interval = contact_interval,
   prob_infect = 0.33,
   onset_to_hosp = onset_to_hosp,
-  onset_to_death = onset_to_death, 
+  onset_to_death = onset_to_death,
   min_outbreak_size = 500
 )
 ```
@@ -173,9 +173,9 @@ The {epicontacts} function `make_epicontacts()` requires both the line list and
 
 ```{r, contact-distribution}
 contact_distribution <- epidist(
-  disease = "COVID-19", 
-  epi_dist = "contact distribution", 
-  prob_distribution = "pois", 
+  disease = "COVID-19",
+  epi_dist = "contact distribution",
+  prob_distribution = "pois",
   prob_distribution_params = c(mean = 2)
 )
 ```

From bb141f88aa595d1e6b6a391f6dae904df51378ed Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 31 Jan 2024 13:34:45 +0000
Subject: [PATCH 36/50] increase excess degree distribution density upper bound

---
 R/sim_network_bp.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index d2bdc5ec..f7256168 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -50,9 +50,9 @@
   # run loop until no more individuals are sampled
   while (next_gen_size > 0) {
     # sample contact distribution (excess degree distribution)
-    q <- contact_distribution(0:100 + 1) * (0:100 + 1)
+    q <- contact_distribution(0:1e4 + 1) * (0:1e4 + 1)
     q <- q / sum(q)
-    contacts <- sample(0:100, size = next_gen_size, replace = TRUE, prob = q)
+    contacts <- sample(0:1e4, size = next_gen_size, replace = TRUE, prob = q)
 
     # add contacts if sampled
     if (sum(contacts) > 0L) {

From cd5fe64e6203dffa73944ca69dc812094ed65b9a Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 31 Jan 2024 13:35:06 +0000
Subject: [PATCH 37/50] update seed in .sim_network_bp test to pass

---
 tests/testthat/test-sim_network_bp.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R
index 920ff5de..5251aaec 100644
--- a/tests/testthat/test-sim_network_bp.R
+++ b/tests/testthat/test-sim_network_bp.R
@@ -45,7 +45,7 @@ test_that(".sim_network_bp works as expected with no contacts", {
       )
     )
   )
-  set.seed(4)
+  set.seed(1)
   res <- .sim_network_bp(
     contact_distribution = contact_distribution,
     contact_interval = contact_interval,

From 431503523fa5a44384144f71e43761a3144661d9 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 5 Feb 2024 12:17:29 +0000
Subject: [PATCH 38/50] added .sample_names function

---
 R/sim_utils.R           | 70 +++++++++++++++++++++++++++++++++++++++++
 man/dot-sample_names.Rd | 32 +++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 man/dot-sample_names.Rd

diff --git a/R/sim_utils.R b/R/sim_utils.R
index 4c81bba4..02bb91d9 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -160,3 +160,73 @@ NULL
     cols = linelist_cols
   )
 }
+
+#' Sample names using [randomNames::randomNames()]
+#'
+#' @description
+#' Sample names for specified genders by sampling with replacement to avoid
+#' exhausting number of name when `sample.with.replacement = FALSE`. The
+#' duplicated names during sampling need to be removed to ensure each
+#' individual has a unique name. In order to have enough unique names, more
+#' names than required are sampled from [randomNames()], and the level of
+#' oversampling is determined by the `buffer_factor` argument. A
+#' `buffer_factor` too high and the more names are sampled which takes longer,
+#' a `buffer_factor` too low and not enough unique names are sampled and
+#' the `.sample_names()` function will need to loop until it has enough
+#' unique names.
+#'
+#' @inheritParams .add_date
+#' @param buffer_factor A single `numeric` determining the level of
+#' oversampling (or buffer) when creating a vector of unique names from
+#' [randomNames()].
+#'
+#' @return A `character` vector.
+#' @keywords internal
+.sample_names <- function(.data,
+                          buffer_factor = 1.5) {
+  m_idx <- .data$gender == "m"
+  f_idx <- .data$gender == "f"
+  num_m <- sum(m_idx)
+  num_f <- sum(f_idx)
+  num_sample_m <- ceiling(num_m * buffer_factor)
+  num_sample_f <- ceiling(num_f * buffer_factor)
+
+  # create sample of names so there are no duplicates
+  names_m <- character(0)
+  while(length(names_m) < num_m) {
+    names_m <- unique(
+      randomNames::randomNames(
+        which.names = "both",
+        name.sep = " ",
+        name.order = "first.last",
+        gender = rep("M", num_sample_m),
+        sample.with.replacement = TRUE
+      )
+    )
+  }
+
+  names_f <- character(0)
+  while(length(names_f) < num_f) {
+    names_f <- unique(
+      randomNames::randomNames(
+        which.names = "both",
+        name.sep = " ",
+        name.order = "first.last",
+        gender = rep("F", num_sample_f),
+        sample.with.replacement = TRUE
+      )
+    )
+  }
+
+  # subset to use required number of names
+  names_m <- names_m[1:num_m]
+  names_f <- names_f[1:num_f]
+
+  # order names with gender codes from .data
+  names_mf <- vector(mode = "character", length = nrow(.data))
+  names_mf[m_idx] <- names_m
+  names_mf[f_idx] <- names_f
+
+  # return vector of names
+  names_mf
+}
diff --git a/man/dot-sample_names.Rd b/man/dot-sample_names.Rd
new file mode 100644
index 00000000..ad064aa1
--- /dev/null
+++ b/man/dot-sample_names.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sim_utils.R
+\name{.sample_names}
+\alias{.sample_names}
+\title{Sample names using \code{\link[randomNames:randomNames]{randomNames::randomNames()}}}
+\usage{
+.sample_names(.data, buffer_factor = 1.5)
+}
+\arguments{
+\item{.data}{A \verb{<data.frame>} containing the infectious history from a
+branching process simulation.}
+
+\item{buffer_factor}{A single \code{numeric} determining the level of
+oversampling (or buffer) when creating a vector of unique names from
+\code{\link[=randomNames]{randomNames()}}.}
+}
+\value{
+A \code{character} vector.
+}
+\description{
+Sample names for specified genders by sampling with replacement to avoid
+exhausting number of name when \code{sample.with.replacement = FALSE}. The
+duplicated names during sampling need to be removed to ensure each
+individual has a unique name. In order to have enough unique names, more
+names than required are sampled from \code{\link[=randomNames]{randomNames()}}, and the level of
+oversampling is determined by the \code{buffer_factor} argument. A
+\code{buffer_factor} too high and the more names are sampled which takes longer,
+a \code{buffer_factor} too low and not enough unique names are sampled and
+the \code{.sample_names()} function will need to loop until it has enough
+unique names.
+}
+\keyword{internal}

From d76716334b70de186a925677d8db50aad35a536c Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 5 Feb 2024 12:17:51 +0000
Subject: [PATCH 39/50] use .sample_names in .add_names function

---
 R/add_cols.R | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/R/add_cols.R b/R/add_cols.R
index 78866f25..543e0cfb 100644
--- a/R/add_cols.R
+++ b/R/add_cols.R
@@ -184,14 +184,7 @@ NULL
 
 #' @name .add_info
 .add_names <- function(.data) {
-  # create sample of names so there are no duplicates
-  .data$case_name <- randomNames::randomNames(
-    which.names = "both",
-    name.sep = " ",
-    name.order = "first.last",
-    gender = .data$gender,
-    sample.with.replacement = FALSE
-  )
+  .data$case_name <- .sample_names(.data = .data)
 
   # left join corresponding names to infectors preserving column and row order
   infector_names <- data.frame(id = .data$id, infector_name = .data$case_name)

From 69ff03f4a0d8b64689201b8a8ce30cb22267746c Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 5 Feb 2024 12:18:26 +0000
Subject: [PATCH 40/50] removed warning and break for large line lists with
 names in .sim_network_bp

---
 R/sim_network_bp.R | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index f7256168..6d7ebae1 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -60,16 +60,6 @@
       chain_length <- chain_length + any(contacts >= 1L)
       chain_generation <- chain_generation + 1L
 
-      if (add_names && chain_size > 5180) {
-        warning(
-          "Returning early as total number of contacts exceeds number of ",
-          "names available from {randomNames}. \n If you want to simulate ",
-          "a larger line lists set add_names = FALSE",
-          call. = FALSE
-        )
-        break
-      }
-
       for (i in seq_along(contacts)) {
 
         if (contacts[i] > 0) {

From fb5e435c8e15a244d8d26dfea55efcdf0be99d0e Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 5 Feb 2024 12:19:50 +0000
Subject: [PATCH 41/50] linting

---
 R/sim_utils.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/sim_utils.R b/R/sim_utils.R
index 02bb91d9..656cbbd1 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -193,7 +193,7 @@ NULL
 
   # create sample of names so there are no duplicates
   names_m <- character(0)
-  while(length(names_m) < num_m) {
+  while (length(names_m) < num_m) {
     names_m <- unique(
       randomNames::randomNames(
         which.names = "both",
@@ -206,7 +206,7 @@ NULL
   }
 
   names_f <- character(0)
-  while(length(names_f) < num_f) {
+  while (length(names_f) < num_f) {
     names_f <- unique(
       randomNames::randomNames(
         which.names = "both",

From e60bdf787a612c513d92c260899fd2c4968218c7 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:21:40 +0000
Subject: [PATCH 42/50] added network option to create_config

---
 R/create_config.R    | 11 ++++++++++-
 man/create_config.Rd |  8 ++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/R/create_config.R b/R/create_config.R
index 75694b62..22209cf8 100644
--- a/R/create_config.R
+++ b/R/create_config.R
@@ -12,6 +12,7 @@
 #' * `first_contact_distribution_params = c(lambda = 3)`
 #' * `ct_distribution = "norm"`
 #' * `ct_distribution_params = c(mean = 25, sd = 2)`
+#' * `network = "adjusted"`
 #'
 #' These parameters do not warrant their own arguments in
 #' [sim_linelist()] as they rarely need to be changed from their default
@@ -19,6 +20,13 @@
 #' arguments to accommodate these and the `config` argument keeps the function
 #' signature simpler and more readable.
 #'
+#' The `network` option controls whether to sample contacts from a adjusted or
+#' unadjusted contact distribution. Adjusted (default) sampling uses
+#' \eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability
+#' density function of a distribution, e.g., Poisson or Negative binomial.
+#' Unadjusted (`network = "unadjusted"`) instead samples contacts directly from
+#' a probability distribution \eqn{p(n)}.
+#'
 #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Named elements to replace
 #' default settings. Only if names match exactly are elements replaced,
 #' otherwise the function errors.
@@ -42,7 +50,8 @@ create_config <- function(...) {
     first_contact_distribution = "pois",
     first_contact_distribution_params = c(lambda = 3),
     ct_distribution = "norm",
-    ct_distribution_params = c(mean = 25, sd = 2)
+    ct_distribution_params = c(mean = 25, sd = 2),
+    network = "adjusted"
   )
 
   # capture dynamic dots
diff --git a/man/create_config.Rd b/man/create_config.Rd
index 6a1903eb..ae79b539 100644
--- a/man/create_config.Rd
+++ b/man/create_config.Rd
@@ -31,6 +31,7 @@ Accepted arguments and their defaults are:
 \item \code{first_contact_distribution_params = c(lambda = 3)}
 \item \code{ct_distribution = "norm"}
 \item \code{ct_distribution_params = c(mean = 25, sd = 2)}
+\item \code{network = "adjusted"}
 }
 
 These parameters do not warrant their own arguments in
@@ -38,6 +39,13 @@ These parameters do not warrant their own arguments in
 setting. Therefore it is not worth increasing the number of \code{\link[=sim_linelist]{sim_linelist()}}
 arguments to accommodate these and the \code{config} argument keeps the function
 signature simpler and more readable.
+
+The \code{network} option controls whether to sample contacts from a adjusted or
+unadjusted contact distribution. Adjusted (default) sampling uses
+\eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability
+density function of a distribution, e.g., Poisson or Negative binomial.
+Unadjusted (\code{network = "unadjusted"}) instead samples contacts directly from
+a probability distribution \eqn{p(n)}.
 }
 \examples{
 # example with default configuration

From 2985b3976c70f4e4bb2fb6a2cfd6c64e90b05834 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:22:21 +0000
Subject: [PATCH 43/50] updated create_config tests

---
 tests/testthat/test-create_config.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/testthat/test-create_config.R b/tests/testthat/test-create_config.R
index 1163b447..6d82b971 100644
--- a/tests/testthat/test-create_config.R
+++ b/tests/testthat/test-create_config.R
@@ -1,13 +1,13 @@
 test_that("create_config works as expected with defaults", {
   config <- create_config()
   expect_type(config, type = "list")
-  expect_length(config, 6)
+  expect_length(config, 7)
   expect_named(
     config,
     c(
       "last_contact_distribution", "last_contact_distribution_params",
       "first_contact_distribution", "first_contact_distribution_params",
-      "ct_distribution", "ct_distribution_params"
+      "ct_distribution", "ct_distribution_params", "network"
     )
   )
 })
@@ -15,13 +15,13 @@ test_that("create_config works as expected with defaults", {
 test_that("create_config works as expected modifying element", {
   config <- create_config(last_contact_distribution = "geom")
   expect_type(config, type = "list")
-  expect_length(config, 6)
+  expect_length(config, 7)
   expect_named(
     config,
     c(
       "last_contact_distribution", "last_contact_distribution_params",
       "first_contact_distribution", "first_contact_distribution_params",
-      "ct_distribution", "ct_distribution_params"
+      "ct_distribution", "ct_distribution_params", "network"
     )
   )
   expect_identical(config$last_contact_distribution, "geom")
@@ -35,13 +35,13 @@ test_that("create_config works as expected with spliced list", {
     )
   )
   expect_type(config, type = "list")
-  expect_length(config, 6)
+  expect_length(config, 7)
   expect_named(
     config,
     c(
       "last_contact_distribution", "last_contact_distribution_params",
       "first_contact_distribution", "first_contact_distribution_params",
-      "ct_distribution", "ct_distribution_params"
+      "ct_distribution", "ct_distribution_params", "network"
     )
   )
   expect_identical(config$ct_distribution, "lnorm")
@@ -55,13 +55,13 @@ test_that("create_config works as expected with spliced list", {
     )
   )
   expect_type(config, type = "list")
-  expect_length(config, 6)
+  expect_length(config, 7)
   expect_named(
     config,
     c(
       "last_contact_distribution", "last_contact_distribution_params",
       "first_contact_distribution", "first_contact_distribution_params",
-      "ct_distribution", "ct_distribution_params"
+      "ct_distribution", "ct_distribution_params", "network"
     )
   )
   expect_identical(config$last_contact_distribution, "geom")

From 173a855dfd04b2e436f40ca658ddfa883e1c4d6e Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:26:47 +0000
Subject: [PATCH 44/50] add option to turn off network effect in
 .sim_network_bp and remove add_names argument

---
 R/sim_network_bp.R        | 16 ++++++++++++----
 man/dot-sim_network_bp.Rd |  6 +++---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R
index 6d7ebae1..46532a69 100644
--- a/R/sim_network_bp.R
+++ b/R/sim_network_bp.R
@@ -24,7 +24,11 @@
 .sim_network_bp <- function(contact_distribution,
                             contact_interval,
                             prob_infect,
-                            add_names) {
+                            config) {
+  if (is.null(config$network) ||
+      !config$network %in% c("adjusted", "unadjusted")) {
+    stop("Network incorrectly specified, check config", call. = FALSE)
+  }
 
   # initialise data object
   ancestor <- vector(mode = "integer", 1e5)
@@ -49,9 +53,13 @@
 
   # run loop until no more individuals are sampled
   while (next_gen_size > 0) {
-    # sample contact distribution (excess degree distribution)
-    q <- contact_distribution(0:1e4 + 1) * (0:1e4 + 1)
-    q <- q / sum(q)
+    if (config$network == "adjusted") {
+      # sample contact distribution (excess degree distribution)
+      q <- contact_distribution(0:1e4 + 1) * (0:1e4 + 1)
+      q <- q / sum(q)
+    } else {
+      q <- contact_distribution(0:1e4)
+    }
     contacts <- sample(0:1e4, size = next_gen_size, replace = TRUE, prob = q)
 
     # add contacts if sampled
diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd
index 3cfbe456..6719bdf3 100644
--- a/man/dot-sim_network_bp.Rd
+++ b/man/dot-sim_network_bp.Rd
@@ -5,7 +5,7 @@
 \title{Simulate a random network branching process model with a probability of
 infection for each contact}
 \usage{
-.sim_network_bp(contact_distribution, contact_interval, prob_infect, add_names)
+.sim_network_bp(contact_distribution, contact_interval, prob_infect, config)
 }
 \arguments{
 \item{contact_distribution}{An \verb{<epidist>} object or anonymous function for
@@ -22,8 +22,8 @@ zero) and having contact with another susceptible individual.}
 \item{prob_infect}{A single \code{numeric} for the probability of a secondary
 contact being infected by an infected primary contact.}
 
-\item{add_names}{A \code{logical} boolean for whether to add names to each row
-of the line list. Default is \code{TRUE}.}
+\item{config}{A list of settings to adjust the randomly sampled delays and
+Ct values (if \code{add_ct = TRUE}). See \code{\link[=create_config]{create_config()}} for more information.}
 }
 \value{
 A \verb{<data.frame>} with the contact and transmission chain data.

From be49848e23286ca6700c3f07d12a042522f448e3 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:27:28 +0000
Subject: [PATCH 45/50] added test for .sim_network_bp without network effect
 and update other tests

---
 tests/testthat/test-sim_network_bp.R | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R
index 5251aaec..6cbc6e2a 100644
--- a/tests/testthat/test-sim_network_bp.R
+++ b/tests/testthat/test-sim_network_bp.R
@@ -24,7 +24,7 @@ test_that(".sim_network_bp works as expected", {
     contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
-    add_names = TRUE
+    config = create_config()
   )
   expect_s3_class(res, class = "data.frame")
   expect_identical(dim(res), c(10L, 5L))
@@ -50,7 +50,7 @@ test_that(".sim_network_bp works as expected with no contacts", {
     contact_distribution = contact_distribution,
     contact_interval = contact_interval,
     prob_infect = 0.5,
-    add_names = TRUE
+    config = create_config()
   )
   expect_s3_class(res, class = "data.frame")
   expect_identical(dim(res), c(1L, 5L))
@@ -59,3 +59,19 @@ test_that(".sim_network_bp works as expected with no contacts", {
     c("id", "ancestor", "generation", "infected", "time")
   )
 })
+
+test_that(".sim_network_bp works as expected with unadjusted network", {
+  set.seed(1)
+  res <- .sim_network_bp(
+    contact_distribution = contact_distribution,
+    contact_interval = contact_interval,
+    prob_infect = 0.5,
+    config = create_config(network = "unadjusted")
+  )
+  expect_s3_class(res, class = "data.frame")
+  expect_identical(dim(res), c(10L, 5L))
+  expect_identical(
+    colnames(res),
+    c("id", "ancestor", "generation", "infected", "time")
+  )
+})
\ No newline at end of file

From baeb2833c4d0725928c853d0c9d30dee4e232976 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:28:06 +0000
Subject: [PATCH 46/50] removed add_names argument from .sim_network_bp call in
 sim_* functions

---
 R/sim_contacts.R | 1 -
 R/sim_linelist.R | 1 -
 R/sim_outbreak.R | 1 -
 3 files changed, 3 deletions(-)

diff --git a/R/sim_contacts.R b/R/sim_contacts.R
index fcc73818..3f109fb4 100644
--- a/R/sim_contacts.R
+++ b/R/sim_contacts.R
@@ -72,7 +72,6 @@ sim_contacts <- function(contact_distribution,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
-    add_names = TRUE,
     config = config
   )
 
diff --git a/R/sim_linelist.R b/R/sim_linelist.R
index 7d0bc768..9ee1ec51 100644
--- a/R/sim_linelist.R
+++ b/R/sim_linelist.R
@@ -219,7 +219,6 @@ sim_linelist <- function(contact_distribution,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
-    add_names = add_names,
     config = config
   )
 
diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R
index eb819a4e..494beccd 100644
--- a/R/sim_outbreak.R
+++ b/R/sim_outbreak.R
@@ -146,7 +146,6 @@ sim_outbreak <- function(contact_distribution,
     outbreak_start_date = outbreak_start_date,
     min_outbreak_size = min_outbreak_size,
     population_age = population_age,
-    add_names = add_names,
     config = config
   )
 

From 91f77ff92bd3a2ecb3d60305f89c271a4827fdfe Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:28:48 +0000
Subject: [PATCH 47/50] removed add_names argument from .sim_bp_linelist

---
 R/sim_utils.R        | 3 +--
 man/dot-sim_utils.Rd | 7 +++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/R/sim_utils.R b/R/sim_utils.R
index 656cbbd1..d4386628 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -20,7 +20,6 @@ NULL
                              outbreak_start_date,
                              min_outbreak_size,
                              population_age,
-                             add_names,
                              config) {
   outbreak_size <- 0
   max_iter <- 0L
@@ -30,7 +29,7 @@ NULL
       contact_distribution = contact_distribution,
       contact_interval = contact_interval,
       prob_infect = prob_infect,
-      add_names = add_names
+      config = config
     )
     outbreak_size <- sum(chain$infected == "infected")
     max_iter <- max_iter + 1L
diff --git a/man/dot-sim_utils.Rd b/man/dot-sim_utils.Rd
index d07a53a4..d38dbdd6 100644
--- a/man/dot-sim_utils.Rd
+++ b/man/dot-sim_utils.Rd
@@ -14,7 +14,6 @@ within \pkg{simulist}}
   outbreak_start_date,
   min_outbreak_size,
   population_age,
-  add_names,
   config
 )
 
@@ -61,9 +60,6 @@ range (both inclusive, i.e. [lower, upper]). The \verb{<data.frame>} with
 age groups and the proportion of the population in that group. See details
 and examples for more information.}
 
-\item{add_names}{A \code{logical} boolean for whether to add names to each row
-of the line list. Default is \code{TRUE}.}
-
 \item{config}{A list of settings to adjust the randomly sampled delays and
 Ct values (if \code{add_ct = TRUE}). See \code{\link[=create_config]{create_config()}} for more information.}
 
@@ -90,6 +86,9 @@ specific death risks outside of hospitals. Default is 5\% death risk outside
 of hospitals  (\code{0.05}) for the entire population. See details and examples
 for more information.}
 
+\item{add_names}{A \code{logical} boolean for whether to add names to each row
+of the line list. Default is \code{TRUE}.}
+
 \item{add_ct}{A \code{logical} boolean for whether to add Ct values to each row
 of the line list. Default is \code{TRUE}.}
 

From 5d83edce4dbe3bc30a6189e75399d0f4db89d238 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:29:08 +0000
Subject: [PATCH 48/50] updated sim_contacts and sim_linelist tests

---
 tests/testthat/test-sim_contacts.R | 2 +-
 tests/testthat/test-sim_linelist.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-sim_contacts.R b/tests/testthat/test-sim_contacts.R
index 08edfe7d..ebc07c83 100644
--- a/tests/testthat/test-sim_contacts.R
+++ b/tests/testthat/test-sim_contacts.R
@@ -100,6 +100,6 @@ test_that("sim_contacts fails as expected with empty config", {
       prob_infect = 0.5,
       config = list()
     ),
-    regexp = "Distribution parameters are missing, check config"
+    regexp = "Network incorrectly specified, check config"
   )
 })
diff --git a/tests/testthat/test-sim_linelist.R b/tests/testthat/test-sim_linelist.R
index 65b03995..03498286 100644
--- a/tests/testthat/test-sim_linelist.R
+++ b/tests/testthat/test-sim_linelist.R
@@ -321,7 +321,7 @@ test_that("sim_linelist fails as expected with empty config", {
       onset_to_death = onset_to_death,
       config = list()
     ),
-    regexp = "Distribution parameters are missing, check config"
+    regexp = "Network incorrectly specified, check config"
   )
 })
 

From 297d71388c51261848c005eda89fe12bda668471 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 10:29:34 +0000
Subject: [PATCH 49/50] added info box to simulist vignette on network effect
 in model

---
 vignettes/simulist.Rmd | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vignettes/simulist.Rmd b/vignettes/simulist.Rmd
index 68d9c5fd..16c9b779 100644
--- a/vignettes/simulist.Rmd
+++ b/vignettes/simulist.Rmd
@@ -199,6 +199,14 @@ head(outbreak$contacts)
 
 `sim_outbreak()` has the same features as `sim_linelist()` and `sim_contacts()`, this includes simulating with age-stratified risks of hospitalisation and death, the probability of case types or contact tracing status can be modified.
 
+::: {.alert .alert-info}
+
+_Advanced_
+
+The `sim_*()` functions, by default, use an excess degree distribution to account for a network effect when sampling the number of contacts in the simulation model ($q(n) \sim (n + 1)p(n + 1)$ where $p(n)$ is the probability density function of a distribution, e.g., Poisson or Negative binomial, within the `.sim_network_bp()` internal function). This network effect can be turned off by using the `config` argument in any `sim_*()` function and setting `network = "unadjusted"` (`create_config(network = "unadjusted")`) which will instead sample from a probability distribution $p(n)$.
+
+:::
+
 ## Using functions for distributions instead of `<epidist>`
 
 It is possible to use an [anonymous function](https://en.wikipedia.org/wiki/Anonymous_function) instead of an `<epidist>` object when specifying the parameters of the delay and contact distributions. We recommend using the `<epidist>` objects but here outline the alternative approach.

From bdac6deddc1aeafed2ff1e91b1ecce313453524d Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Tue, 6 Feb 2024 11:44:36 +0000
Subject: [PATCH 50/50] use snapshot tests for regression testing
 .sim_network_bp

---
 tests/testthat/_snaps/sim_network_bp.md | 45 +++++++++++++++++++++
 tests/testthat/test-sim_network_bp.R    | 54 ++++++++++---------------
 2 files changed, 66 insertions(+), 33 deletions(-)
 create mode 100644 tests/testthat/_snaps/sim_network_bp.md

diff --git a/tests/testthat/_snaps/sim_network_bp.md b/tests/testthat/_snaps/sim_network_bp.md
new file mode 100644
index 00000000..89a5fc03
--- /dev/null
+++ b/tests/testthat/_snaps/sim_network_bp.md
@@ -0,0 +1,45 @@
+# .sim_network_bp works as expected
+
+    Code
+      .sim_network_bp(contact_distribution = contact_distribution, contact_interval = contact_interval,
+        prob_infect = 0.5, config = create_config())
+    Output
+         id ancestor generation infected       time
+      1   1       NA          1 infected 0.00000000
+      2   2        1          2  contact 1.88240160
+      3   3        1          2 infected 1.80451250
+      4   4        3          3 infected 0.05896314
+      5   5        3          3  contact 1.15835525
+      6   6        3          3  contact 0.99001994
+      7   7        4          4 infected 2.14036129
+      8   8        7          5  contact 0.46988251
+      9   9        7          5  contact 0.69425308
+      10 10        7          5  contact 0.06819735
+
+# .sim_network_bp works as expected with no contacts
+
+    Code
+      .sim_network_bp(contact_distribution = contact_distribution, contact_interval = contact_interval,
+        prob_infect = 0.5, config = create_config())
+    Output
+        id ancestor generation infected time
+      1  1       NA          1 infected    0
+
+# .sim_network_bp works as expected with unadjusted network
+
+    Code
+      .sim_network_bp(contact_distribution = contact_distribution, contact_interval = contact_interval,
+        prob_infect = 0.5, config = create_config(network = "unadjusted"))
+    Output
+         id ancestor generation infected       time
+      1   1       NA          1 infected 0.00000000
+      2   2        1          2  contact 1.88240160
+      3   3        1          2 infected 1.80451250
+      4   4        3          3 infected 0.05896314
+      5   5        3          3  contact 1.15835525
+      6   6        3          3  contact 0.99001994
+      7   7        4          4 infected 2.14036129
+      8   8        7          5  contact 0.46988251
+      9   9        7          5  contact 0.69425308
+      10 10        7          5  contact 0.06819735
+
diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R
index 6cbc6e2a..164de9e2 100644
--- a/tests/testthat/test-sim_network_bp.R
+++ b/tests/testthat/test-sim_network_bp.R
@@ -20,17 +20,13 @@ suppressMessages({
 
 test_that(".sim_network_bp works as expected", {
   set.seed(1)
-  res <- .sim_network_bp(
-    contact_distribution = contact_distribution,
-    contact_interval = contact_interval,
-    prob_infect = 0.5,
-    config = create_config()
-  )
-  expect_s3_class(res, class = "data.frame")
-  expect_identical(dim(res), c(10L, 5L))
-  expect_identical(
-    colnames(res),
-    c("id", "ancestor", "generation", "infected", "time")
+  expect_snapshot(
+    .sim_network_bp(
+      contact_distribution = contact_distribution,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
+      config = create_config()
+    )
   )
 })
 
@@ -46,32 +42,24 @@ test_that(".sim_network_bp works as expected with no contacts", {
     )
   )
   set.seed(1)
-  res <- .sim_network_bp(
-    contact_distribution = contact_distribution,
-    contact_interval = contact_interval,
-    prob_infect = 0.5,
-    config = create_config()
-  )
-  expect_s3_class(res, class = "data.frame")
-  expect_identical(dim(res), c(1L, 5L))
-  expect_identical(
-    colnames(res),
-    c("id", "ancestor", "generation", "infected", "time")
+  expect_snapshot(
+    .sim_network_bp(
+      contact_distribution = contact_distribution,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
+      config = create_config()
+    )
   )
 })
 
 test_that(".sim_network_bp works as expected with unadjusted network", {
   set.seed(1)
-  res <- .sim_network_bp(
-    contact_distribution = contact_distribution,
-    contact_interval = contact_interval,
-    prob_infect = 0.5,
-    config = create_config(network = "unadjusted")
-  )
-  expect_s3_class(res, class = "data.frame")
-  expect_identical(dim(res), c(10L, 5L))
-  expect_identical(
-    colnames(res),
-    c("id", "ancestor", "generation", "infected", "time")
+  expect_snapshot(
+    .sim_network_bp(
+      contact_distribution = contact_distribution,
+      contact_interval = contact_interval,
+      prob_infect = 0.5,
+      config = create_config(network = "unadjusted")
+    )
   )
 })
\ No newline at end of file