-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit caea8f2
Showing
21 changed files
with
878 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
Package: nbconv | ||
Title: Evaluate Arbitrary Negative Binomial Convolutions | ||
Version: 1.0.0 | ||
Authors@R: | ||
person( | ||
"Gregory", "Bedwell", | ||
email = "gregoryjbedwell@gmail.com", | ||
role = c("aut", "cre", "cph"), | ||
comment = c(ORCID = "0000-0003-0456-0032") ) | ||
URL: https://github.com/gbedwell/nbconv | ||
BugReports: https://github.com/gbedwell/nbconv/issues | ||
Imports: parallel, matrixStats, stats | ||
Description: Three distinct methods are implemented for evaluating the sums of arbitrary negative binomial distributions. These methods are: Furman's exact probability mass function (Furman (2007) <doi:10.1016/j.spl.2006.06.007>), saddlepoint approximation, and a method of moments approximation. Functions are provided to calculate the density function, the distribution function and the quantile function of the convolutions in question given said evaluation methods. Functions for generating random deviates from negative binomial convolutions and for directly calculating the mean, variance, skewness, and excess kurtosis of said convolutions are also provided. | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.0 | ||
License: GPL (>= 3) | ||
NeedsCompilation: no | ||
Packaged: 2023-04-26 12:49:11 UTC; gbedwell | ||
Author: Gregory Bedwell [aut, cre, cph] | ||
(<https://orcid.org/0000-0003-0456-0032>) | ||
Maintainer: Gregory Bedwell <gregoryjbedwell@gmail.com> | ||
Repository: CRAN | ||
Date/Publication: 2023-04-27 12:50:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
3075f708f3a4c1fc5edc2fc10deb16db *DESCRIPTION | ||
0ab9635778542a5905b28d6cd7a339b0 *NAMESPACE | ||
458f726e6fe0e86e5f1608bc9f46ef3f *NEWS.md | ||
99bb41a8d47f9e5c9686f6c54cac4188 *R/dnbconv.R | ||
5994f87d65766a9d48f50c8558ccaed5 *R/nb_sum_exact.R | ||
00b27d04a6e966f7858af01fc0600e0f *R/nb_sum_moments.R | ||
44707a8116d7eb1fc833a718a81105d9 *R/nb_sum_saddlepoint.R | ||
1a264257abe9facfbcae593ccec786a6 *R/nbconv_params.R | ||
7afb7decba2428d80c348d90eff1620c *R/pnbconv.R | ||
d9ab1b6104179ee145bb0f61b9bc365b *R/qnbconv.R | ||
b6d18007ed2e927fab8741ec53751f80 *R/rnbconv.R | ||
3fa298bd3de98df62a9d1dd55c6e732d *README.md | ||
baf003c448485c4e5358634418af1913 *man/dnbconv.Rd | ||
118ecda1aa82761fb4d4dd4680edb4b5 *man/nb_sum_exact.Rd | ||
b2179398035882a63d87cb97f17a62e4 *man/nb_sum_moments.Rd | ||
50be4396a808bf89f96351b294a0b6a2 *man/nb_sum_saddlepoint.Rd | ||
3cb368dc0901eae43070ca9ef63be6a2 *man/nbconv_params.Rd | ||
45cf1613f62b02b98495af264cc459c1 *man/pnbconv.Rd | ||
2e987ebbeca2edf473e1b00532dc372a *man/qnbconv.Rd | ||
fc820024b18e01a9f1e19b075846d749 *man/rnbconv.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(dnbconv) | ||
export(nb_sum_exact) | ||
export(nb_sum_moments) | ||
export(nb_sum_saddlepoint) | ||
export(nbconv_params) | ||
export(pnbconv) | ||
export(qnbconv) | ||
export(rnbconv) | ||
import(matrixStats) | ||
import(parallel) | ||
importFrom(stats,"dnbinom") | ||
importFrom(stats,"pnbinom") | ||
importFrom(stats,"rnbinom") | ||
importFrom(stats,"uniroot") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# nbconv 0.0.0.9000 | ||
|
||
* Added a `NEWS.md` file to track changes to the package. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#' Calculates the PMF for the convolution of arbitrary negative binomial random variables. | ||
#' | ||
#'@param counts The counts over which the convolution is evaluated. Should be a vector. | ||
#'@param mus Vector of individual mean values | ||
#'@param ps Vector of individual probabilities of success. | ||
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom. | ||
#'@param method The method by which to evaluate the PMF. One of "exact", "moments", or "saddlepoint". | ||
#'@param n.terms The number of terms to include in the series for evaluating the PMF at a given number of counts. Defaults to 1000. | ||
#'@param n.cores The number of CPU cores to use in the evaluation. Allows parallelization. | ||
#'@param tolerance The acceptable difference between the sum of the K distribution and 1. | ||
#'@param normalize Boolean. If TRUE, the PMF is normalized to sum to 1. | ||
#' | ||
#'@returns A numeric vector of probability densities. | ||
#' | ||
#'@examples dnbconv(counts = 0:500, mus = c(100, 10), phis = c(5, 8), method = "exact") | ||
#' | ||
#'@import parallel | ||
#'@import matrixStats | ||
#' | ||
#'@export | ||
#' | ||
dnbconv <- function(counts, mus, ps, phis, method = c("exact", "moments", "saddlepoint"), | ||
n.terms = 1000, n.cores = 1, tolerance = 1e-3, normalize = TRUE){ | ||
|
||
method <- match.arg(method, c("exact", "moments", "saddlepoint")) | ||
|
||
if( method != "exact" & method != "moments" & method != "saddlepoint"){ | ||
stop("method must be one of 'exact', 'moments', or 'saddlepoint'.", call. = FALSE) | ||
} | ||
|
||
if (!missing(ps) & !missing(mus)){ | ||
stop("'mus' and 'ps' both specified", call. = FALSE) | ||
} | ||
|
||
if (missing(ps) & missing(mus)){ | ||
stop("One of 'mus' and 'ps' must be specified", call. = FALSE) | ||
} | ||
|
||
if( method == "exact" ){ | ||
if (missing(ps) & !missing(mus)){ | ||
ps <- phis / ( phis + mus ) | ||
} | ||
if (length(ps) != length(phis)){ | ||
stop("'ps' and 'phis' must have the same length.", call. = FALSE) | ||
} | ||
} | ||
|
||
if( method == "moments" | method == "saddlepoint"){ | ||
if (missing(mus) & !missing(ps)){ | ||
mus <- phis*(1 - ps)/ps | ||
} | ||
if (length(mus) != length(phis)){ | ||
stop("'mus' and 'phis' must have the same length.", call. = FALSE) | ||
} | ||
} | ||
|
||
pmf <- switch( method, | ||
"exact" = nb_sum_exact( ps = ps, phis = phis, counts = counts, n.terms = n.terms, n.cores = n.cores, tolerance = tolerance ), | ||
"moments" = nb_sum_moments( mus = mus, phis = phis, counts = counts ), | ||
"saddlepoint" = nb_sum_saddlepoint( mus = mus, phis = phis, counts, normalize = normalize, n.cores = n.cores ) ) | ||
|
||
return( pmf ) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#' Implements Furman's exact PMF for the evaluation of the sum of arbitrary NB random variables. Called by other functions. Not intended to be run alone. | ||
#' | ||
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom. | ||
#'@param ps Vector of individual probabilities of success. | ||
#'@param n.terms The number of terms to include in the series for evaluating the PMF at a given number of counts. Defaults to 1000. | ||
#'@param counts The vector of counts over which the PMF is evaluated. | ||
#'@param n.cores The number of CPU cores to use in the evaluation. Allows parallelization. | ||
#'@param tolerance The acceptable difference between the sum of the K distribution and 1. | ||
#' | ||
#'@returns A numeric vector of probability densities. | ||
#' | ||
#'@examples nb_sum_exact(ps = c(0.05, 0.44), phis = c(5, 8), counts = 0:500) | ||
#' | ||
#'@import parallel | ||
#'@import matrixStats | ||
#' | ||
#'@export | ||
#' | ||
nb_sum_exact <- function(phis, ps, n.terms = 1000, counts, n.cores = 1, tolerance = 1e-3){ | ||
# Implements the PMF described in https://ssrn.com/abstract=1650365 | ||
|
||
qs <- 1 - ps | ||
pmax <- max(ps) | ||
qmax <- 1 - pmax | ||
phisum <- sum(phis) | ||
|
||
logR <- sum( -phis * ( log( qs * pmax ) - log ( ps * qmax ) ) ) | ||
|
||
xi <- c() | ||
xtmp <- c() | ||
for ( i in 1:n.terms ){ | ||
xtmp <- ( log( phis ) + i * log( 1 - qmax * ps / ( qs * pmax ) ) ) - log( i ) | ||
xi[i] <- logSumExp( xtmp ) | ||
} | ||
|
||
delta <- c() | ||
dtmp <- c() | ||
delta[1] <- 0 | ||
|
||
for (k in 1:( n.terms - 1 )){ | ||
for ( i in 1:k ) { | ||
previndex <- k + 1 - i | ||
dtmp[i] <- log( i ) + xi[i] + delta[previndex] | ||
} | ||
delta[k + 1] <- logSumExp( dtmp ) - log( k ) | ||
} | ||
|
||
logKdist <- logR + delta | ||
Ktest <- all.equal(1, sum(exp(logKdist)), tolerance = tolerance) | ||
|
||
if (!isTRUE( Ktest )){ | ||
stop( paste0( "The sum of the K distribution is insufficiently close to 1. ", Ktest, ". Use more terms." ), call. = FALSE ) | ||
} | ||
|
||
mass_calc <- function(x){ | ||
total <- 0 | ||
for (k in 0:(n.terms - 1)){ | ||
probs <- delta[k + 1] + ( lgamma( phisum + x + k ) - lgamma( phisum + k ) - lfactorial( x ) + ( phisum + k ) * log( pmax ) + x * log( qmax ) ) | ||
total <- total + exp( probs ) | ||
} | ||
masses <- log( total ) + logR | ||
return( masses ) | ||
} | ||
|
||
if (n.cores == 1){ | ||
pmf <- mass_calc(x = counts) | ||
} else { | ||
count.list <- split(counts, ceiling((seq_along(counts))/1000)) | ||
|
||
pmf.list <- mclapply(X = count.list, | ||
FUN = function(y) { | ||
new.counts <- y | ||
pmf <- mass_calc(x = new.counts) | ||
return(pmf) }, | ||
mc.cores = n.cores) | ||
|
||
pmf <- Reduce(c, pmf.list) | ||
} | ||
if (is.numeric( pmf )){ | ||
return( exp ( pmf ) ) | ||
} else{ | ||
error <- sub( "Error : *", "", pmf[1] ) | ||
stop(paste0(error, "\n"), call. = FALSE) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#' Implements the method of moments approximation for the sum of arbitrary NB random variables. Called by other functions. Not intended to be run alone. | ||
#' | ||
#'@param mus Vector of individual mean values. | ||
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom. | ||
#'@param counts The vector of counts over which the PMF is evaluated. | ||
#' | ||
#'@returns A numeric vector of probability densities. | ||
#' | ||
#'@examples nb_sum_moments(mus = c(100, 10), phis = c(5, 8), counts = 0:500) | ||
#' | ||
#'@importFrom stats "dnbinom" | ||
#' | ||
#'@export | ||
#' | ||
nb_sum_moments <- function(mus, phis, counts){ | ||
|
||
mu.moment <- sum( mus ) | ||
phi.moment <- sum( mus )^2 / sum( mus^2 / phis ) | ||
|
||
moments.pmf <- dnbinom(x = counts, | ||
size = phi.moment, | ||
mu = mu.moment) | ||
|
||
return(moments.pmf) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#' Implements the saddlepoint approximation for the sum of arbitrary NB random variables. Called by other functions. Not intended to be run alone. | ||
#' | ||
#' Inspired by https://www.martinmodrak.cz/2019/06/20/approximate-densities-for-sums-of-variables-negative-binomials-and-saddlepoint/ | ||
#' | ||
#'@param mus Vector of individual mean values. | ||
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom. | ||
#'@param counts The vector of counts over which the PMF is evaluated. | ||
#'@param normalize Boolean. If TRUE, the PMF is normalized to sum to 1. | ||
#'@param n.cores The number of CPU cores to use in the evaluation. Allows parallelization. | ||
#' | ||
#'@returns A numeric vector of probability densities. | ||
#' | ||
#'@examples nb_sum_saddlepoint(mus = c(100, 10), phis = c(5, 8), counts = 0:500) | ||
#' | ||
#'@import matrixStats | ||
#'@import parallel | ||
#'@importFrom stats "pnbinom" | ||
#'@importFrom stats "uniroot" | ||
#' | ||
#'@export | ||
#' | ||
nb_sum_saddlepoint <- function(mus, phis, counts, normalize = TRUE, n.cores = 1){ | ||
|
||
# Inspired by https://www.martinmodrak.cz/2019/06/20/approximate-densities-for-sums-of-variables-negative-binomials-and-saddlepoint/ | ||
|
||
saddlepoint_calc <- function(mus, phis, counts){ | ||
|
||
K <- function(t) { sum( phis * ( log( phis ) - log( phis + mus * ( 1 - exp( t ) ) ) ) ) } | ||
dK <- function(t) { logSumExp( log( phis ) + log( mus ) + t - log( phis + mus - mus * exp( t ) ) ) } | ||
ddK <- function(t) { logSumExp( log( phis ) + log( mus ) + log( phis + mus ) + t - 2 * log( phis + mus - mus * exp( t ) ) ) } | ||
pmf_eq <- function(t, x) { -0.5 * ( log( 2 * pi ) + ddK( t ) ) + K( t ) - t * x } | ||
|
||
if ( min( counts ) == 0 ){ | ||
pmf0 <- prod( pnbinom( 0, size = phis, mu = mus ) ) | ||
} | ||
|
||
counts <- counts[counts != 0] | ||
|
||
pmf <- sapply(X = counts, | ||
FUN = function(x) { | ||
t <- uniroot(function(t) { dK(t) - log(x) }, | ||
lower = -1e2, | ||
upper = min( log( phis / mus + 1 ) ), | ||
extendInt = "yes", | ||
tol = sqrt( .Machine$double.eps ) )$root | ||
pmf <- pmf_eq(t, x) | ||
return(pmf) | ||
} | ||
) | ||
|
||
if (exists("pmf0")){ | ||
pmf <- c( pmf0, exp( pmf ) ) | ||
} | ||
else{ | ||
pmf <- exp( pmf ) | ||
} | ||
return(pmf) | ||
} | ||
|
||
if (n.cores == 1){ | ||
saddlepoint.pmf <- saddlepoint_calc(mus = mus, | ||
phis = phis, | ||
counts = counts) | ||
} | ||
else{ | ||
counts.list <- split( counts, ceiling( ( seq_along( counts ) ) / 1000 ) ) | ||
|
||
pmf.list <- mclapply(X = counts.list, | ||
FUN = function(y) { | ||
pmf <- saddlepoint_calc(mus = mus, | ||
phis = phis, | ||
counts = y) | ||
return(pmf) }, | ||
mc.cores = n.cores) | ||
|
||
saddlepoint.pmf <- Reduce(c, pmf.list) | ||
} | ||
|
||
if (isTRUE( normalize )){ | ||
saddlepoint.pmf <- saddlepoint.pmf / sum( saddlepoint.pmf ) | ||
} | ||
|
||
return( saddlepoint.pmf ) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#' Calculates distribution parameters for the convolution of arbitrary negative binomial random variables. | ||
#' | ||
#'@param mus Vector of individual mean values | ||
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom. | ||
#'@param ps Vector of individual probabilities of success. | ||
#' | ||
#'@returns A named numeric vector of distribution parameters. | ||
#' | ||
#'@examples nbconv_params(mus = c(100, 10), phis = c(5, 8)) | ||
#' | ||
#'@import parallel | ||
#'@import matrixStats | ||
#' | ||
#'@export | ||
#' | ||
nbconv_params <- function(mus, phis, ps){ | ||
|
||
if (!missing(ps) & !missing(mus)){ | ||
stop("'mus' and 'ps' both specified", call. = FALSE) | ||
} | ||
|
||
if (missing(ps) & missing(mus)){ | ||
stop("One of 'mus' and 'ps' must be specified", call. = FALSE) | ||
} | ||
|
||
if (missing(mus) & !missing(ps)){ | ||
mus <- phis*(1 - ps)/ps | ||
} | ||
|
||
if (length(mus) != length(phis)){ | ||
stop("'mus' and 'phis' must have the same length.", call. = FALSE) | ||
} | ||
|
||
# First four cumulants expressed in terms of mu and phi | ||
k1 <- sum( mus ) | ||
k2 <- sum( mus + mus^2 / phis ) | ||
k3 <- sum( ( 2 * mus + phis ) * ( mus + phis ) * mus / phis^2 ) | ||
k4 <- sum( ( 6 * mus^2 + 6 * mus * phis + phis^2)*( mus + phis ) * mus / phis^3 ) | ||
|
||
# Central moments expressed in terms of cumulants | ||
# m1 <- k1 | ||
# m2 <- k2 | ||
# m3 <- k3 | ||
# m4 <- k4 + 3 * k2^2 | ||
|
||
mean <- k1 | ||
sigma2 <- k2 | ||
skewness <- k3 / k2^(3/2) | ||
ekurtosis <- k4 / k2^2 | ||
|
||
pmax <- max( phis / ( phis + mus ) ) | ||
qmax <- 1 - pmax | ||
|
||
K.mean <- ( mean * pmax / qmax ) - sum( phis ) | ||
|
||
params <- c( mean = mean, sigma2 = sigma2, skewness = skewness, ekurtosis = ekurtosis, K.mean = K.mean) | ||
|
||
return( params ) | ||
} |
Oops, something went wrong.