Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
gbedwell authored and cran-robot committed Apr 27, 2023
0 parents commit caea8f2
Show file tree
Hide file tree
Showing 21 changed files with 878 additions and 0 deletions.
23 changes: 23 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Package: nbconv
Title: Evaluate Arbitrary Negative Binomial Convolutions
Version: 1.0.0
Authors@R:
person(
"Gregory", "Bedwell",
email = "gregoryjbedwell@gmail.com",
role = c("aut", "cre", "cph"),
comment = c(ORCID = "0000-0003-0456-0032") )
URL: https://github.com/gbedwell/nbconv
BugReports: https://github.com/gbedwell/nbconv/issues
Imports: parallel, matrixStats, stats
Description: Three distinct methods are implemented for evaluating the sums of arbitrary negative binomial distributions. These methods are: Furman's exact probability mass function (Furman (2007) <doi:10.1016/j.spl.2006.06.007>), saddlepoint approximation, and a method of moments approximation. Functions are provided to calculate the density function, the distribution function and the quantile function of the convolutions in question given said evaluation methods. Functions for generating random deviates from negative binomial convolutions and for directly calculating the mean, variance, skewness, and excess kurtosis of said convolutions are also provided.
Encoding: UTF-8
RoxygenNote: 7.2.0
License: GPL (>= 3)
NeedsCompilation: no
Packaged: 2023-04-26 12:49:11 UTC; gbedwell
Author: Gregory Bedwell [aut, cre, cph]
(<https://orcid.org/0000-0003-0456-0032>)
Maintainer: Gregory Bedwell <gregoryjbedwell@gmail.com>
Repository: CRAN
Date/Publication: 2023-04-27 12:50:02 UTC
20 changes: 20 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
3075f708f3a4c1fc5edc2fc10deb16db *DESCRIPTION
0ab9635778542a5905b28d6cd7a339b0 *NAMESPACE
458f726e6fe0e86e5f1608bc9f46ef3f *NEWS.md
99bb41a8d47f9e5c9686f6c54cac4188 *R/dnbconv.R
5994f87d65766a9d48f50c8558ccaed5 *R/nb_sum_exact.R
00b27d04a6e966f7858af01fc0600e0f *R/nb_sum_moments.R
44707a8116d7eb1fc833a718a81105d9 *R/nb_sum_saddlepoint.R
1a264257abe9facfbcae593ccec786a6 *R/nbconv_params.R
7afb7decba2428d80c348d90eff1620c *R/pnbconv.R
d9ab1b6104179ee145bb0f61b9bc365b *R/qnbconv.R
b6d18007ed2e927fab8741ec53751f80 *R/rnbconv.R
3fa298bd3de98df62a9d1dd55c6e732d *README.md
baf003c448485c4e5358634418af1913 *man/dnbconv.Rd
118ecda1aa82761fb4d4dd4680edb4b5 *man/nb_sum_exact.Rd
b2179398035882a63d87cb97f17a62e4 *man/nb_sum_moments.Rd
50be4396a808bf89f96351b294a0b6a2 *man/nb_sum_saddlepoint.Rd
3cb368dc0901eae43070ca9ef63be6a2 *man/nbconv_params.Rd
45cf1613f62b02b98495af264cc459c1 *man/pnbconv.Rd
2e987ebbeca2edf473e1b00532dc372a *man/qnbconv.Rd
fc820024b18e01a9f1e19b075846d749 *man/rnbconv.Rd
16 changes: 16 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by roxygen2: do not edit by hand

export(dnbconv)
export(nb_sum_exact)
export(nb_sum_moments)
export(nb_sum_saddlepoint)
export(nbconv_params)
export(pnbconv)
export(qnbconv)
export(rnbconv)
import(matrixStats)
import(parallel)
importFrom(stats,"dnbinom")
importFrom(stats,"pnbinom")
importFrom(stats,"rnbinom")
importFrom(stats,"uniroot")
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# nbconv 0.0.0.9000

* Added a `NEWS.md` file to track changes to the package.
63 changes: 63 additions & 0 deletions R/dnbconv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#' Calculates the PMF for the convolution of arbitrary negative binomial random variables.
#'
#'@param counts The counts over which the convolution is evaluated. Should be a vector.
#'@param mus Vector of individual mean values
#'@param ps Vector of individual probabilities of success.
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom.
#'@param method The method by which to evaluate the PMF. One of "exact", "moments", or "saddlepoint".
#'@param n.terms The number of terms to include in the series for evaluating the PMF at a given number of counts. Defaults to 1000.
#'@param n.cores The number of CPU cores to use in the evaluation. Allows parallelization.
#'@param tolerance The acceptable difference between the sum of the K distribution and 1.
#'@param normalize Boolean. If TRUE, the PMF is normalized to sum to 1.
#'
#'@returns A numeric vector of probability densities.
#'
#'@examples dnbconv(counts = 0:500, mus = c(100, 10), phis = c(5, 8), method = "exact")
#'
#'@import parallel
#'@import matrixStats
#'
#'@export
#'
dnbconv <- function(counts, mus, ps, phis, method = c("exact", "moments", "saddlepoint"),
n.terms = 1000, n.cores = 1, tolerance = 1e-3, normalize = TRUE){

method <- match.arg(method, c("exact", "moments", "saddlepoint"))

if( method != "exact" & method != "moments" & method != "saddlepoint"){
stop("method must be one of 'exact', 'moments', or 'saddlepoint'.", call. = FALSE)
}

if (!missing(ps) & !missing(mus)){
stop("'mus' and 'ps' both specified", call. = FALSE)
}

if (missing(ps) & missing(mus)){
stop("One of 'mus' and 'ps' must be specified", call. = FALSE)
}

if( method == "exact" ){
if (missing(ps) & !missing(mus)){
ps <- phis / ( phis + mus )
}
if (length(ps) != length(phis)){
stop("'ps' and 'phis' must have the same length.", call. = FALSE)
}
}

if( method == "moments" | method == "saddlepoint"){
if (missing(mus) & !missing(ps)){
mus <- phis*(1 - ps)/ps
}
if (length(mus) != length(phis)){
stop("'mus' and 'phis' must have the same length.", call. = FALSE)
}
}

pmf <- switch( method,
"exact" = nb_sum_exact( ps = ps, phis = phis, counts = counts, n.terms = n.terms, n.cores = n.cores, tolerance = tolerance ),
"moments" = nb_sum_moments( mus = mus, phis = phis, counts = counts ),
"saddlepoint" = nb_sum_saddlepoint( mus = mus, phis = phis, counts, normalize = normalize, n.cores = n.cores ) )

return( pmf )
}
85 changes: 85 additions & 0 deletions R/nb_sum_exact.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#' Implements Furman's exact PMF for the evaluation of the sum of arbitrary NB random variables. Called by other functions. Not intended to be run alone.
#'
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom.
#'@param ps Vector of individual probabilities of success.
#'@param n.terms The number of terms to include in the series for evaluating the PMF at a given number of counts. Defaults to 1000.
#'@param counts The vector of counts over which the PMF is evaluated.
#'@param n.cores The number of CPU cores to use in the evaluation. Allows parallelization.
#'@param tolerance The acceptable difference between the sum of the K distribution and 1.
#'
#'@returns A numeric vector of probability densities.
#'
#'@examples nb_sum_exact(ps = c(0.05, 0.44), phis = c(5, 8), counts = 0:500)
#'
#'@import parallel
#'@import matrixStats
#'
#'@export
#'
nb_sum_exact <- function(phis, ps, n.terms = 1000, counts, n.cores = 1, tolerance = 1e-3){
# Implements the PMF described in https://ssrn.com/abstract=1650365

qs <- 1 - ps
pmax <- max(ps)
qmax <- 1 - pmax
phisum <- sum(phis)

logR <- sum( -phis * ( log( qs * pmax ) - log ( ps * qmax ) ) )

xi <- c()
xtmp <- c()
for ( i in 1:n.terms ){
xtmp <- ( log( phis ) + i * log( 1 - qmax * ps / ( qs * pmax ) ) ) - log( i )
xi[i] <- logSumExp( xtmp )
}

delta <- c()
dtmp <- c()
delta[1] <- 0

for (k in 1:( n.terms - 1 )){
for ( i in 1:k ) {
previndex <- k + 1 - i
dtmp[i] <- log( i ) + xi[i] + delta[previndex]
}
delta[k + 1] <- logSumExp( dtmp ) - log( k )
}

logKdist <- logR + delta
Ktest <- all.equal(1, sum(exp(logKdist)), tolerance = tolerance)

if (!isTRUE( Ktest )){
stop( paste0( "The sum of the K distribution is insufficiently close to 1. ", Ktest, ". Use more terms." ), call. = FALSE )
}

mass_calc <- function(x){
total <- 0
for (k in 0:(n.terms - 1)){
probs <- delta[k + 1] + ( lgamma( phisum + x + k ) - lgamma( phisum + k ) - lfactorial( x ) + ( phisum + k ) * log( pmax ) + x * log( qmax ) )
total <- total + exp( probs )
}
masses <- log( total ) + logR
return( masses )
}

if (n.cores == 1){
pmf <- mass_calc(x = counts)
} else {
count.list <- split(counts, ceiling((seq_along(counts))/1000))

pmf.list <- mclapply(X = count.list,
FUN = function(y) {
new.counts <- y
pmf <- mass_calc(x = new.counts)
return(pmf) },
mc.cores = n.cores)

pmf <- Reduce(c, pmf.list)
}
if (is.numeric( pmf )){
return( exp ( pmf ) )
} else{
error <- sub( "Error : *", "", pmf[1] )
stop(paste0(error, "\n"), call. = FALSE)
}
}
25 changes: 25 additions & 0 deletions R/nb_sum_moments.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Implements the method of moments approximation for the sum of arbitrary NB random variables. Called by other functions. Not intended to be run alone.
#'
#'@param mus Vector of individual mean values.
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom.
#'@param counts The vector of counts over which the PMF is evaluated.
#'
#'@returns A numeric vector of probability densities.
#'
#'@examples nb_sum_moments(mus = c(100, 10), phis = c(5, 8), counts = 0:500)
#'
#'@importFrom stats "dnbinom"
#'
#'@export
#'
nb_sum_moments <- function(mus, phis, counts){

mu.moment <- sum( mus )
phi.moment <- sum( mus )^2 / sum( mus^2 / phis )

moments.pmf <- dnbinom(x = counts,
size = phi.moment,
mu = mu.moment)

return(moments.pmf)
}
84 changes: 84 additions & 0 deletions R/nb_sum_saddlepoint.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#' Implements the saddlepoint approximation for the sum of arbitrary NB random variables. Called by other functions. Not intended to be run alone.
#'
#' Inspired by https://www.martinmodrak.cz/2019/06/20/approximate-densities-for-sums-of-variables-negative-binomials-and-saddlepoint/
#'
#'@param mus Vector of individual mean values.
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom.
#'@param counts The vector of counts over which the PMF is evaluated.
#'@param normalize Boolean. If TRUE, the PMF is normalized to sum to 1.
#'@param n.cores The number of CPU cores to use in the evaluation. Allows parallelization.
#'
#'@returns A numeric vector of probability densities.
#'
#'@examples nb_sum_saddlepoint(mus = c(100, 10), phis = c(5, 8), counts = 0:500)
#'
#'@import matrixStats
#'@import parallel
#'@importFrom stats "pnbinom"
#'@importFrom stats "uniroot"
#'
#'@export
#'
nb_sum_saddlepoint <- function(mus, phis, counts, normalize = TRUE, n.cores = 1){

# Inspired by https://www.martinmodrak.cz/2019/06/20/approximate-densities-for-sums-of-variables-negative-binomials-and-saddlepoint/

saddlepoint_calc <- function(mus, phis, counts){

K <- function(t) { sum( phis * ( log( phis ) - log( phis + mus * ( 1 - exp( t ) ) ) ) ) }
dK <- function(t) { logSumExp( log( phis ) + log( mus ) + t - log( phis + mus - mus * exp( t ) ) ) }
ddK <- function(t) { logSumExp( log( phis ) + log( mus ) + log( phis + mus ) + t - 2 * log( phis + mus - mus * exp( t ) ) ) }
pmf_eq <- function(t, x) { -0.5 * ( log( 2 * pi ) + ddK( t ) ) + K( t ) - t * x }

if ( min( counts ) == 0 ){
pmf0 <- prod( pnbinom( 0, size = phis, mu = mus ) )
}

counts <- counts[counts != 0]

pmf <- sapply(X = counts,
FUN = function(x) {
t <- uniroot(function(t) { dK(t) - log(x) },
lower = -1e2,
upper = min( log( phis / mus + 1 ) ),
extendInt = "yes",
tol = sqrt( .Machine$double.eps ) )$root
pmf <- pmf_eq(t, x)
return(pmf)
}
)

if (exists("pmf0")){
pmf <- c( pmf0, exp( pmf ) )
}
else{
pmf <- exp( pmf )
}
return(pmf)
}

if (n.cores == 1){
saddlepoint.pmf <- saddlepoint_calc(mus = mus,
phis = phis,
counts = counts)
}
else{
counts.list <- split( counts, ceiling( ( seq_along( counts ) ) / 1000 ) )

pmf.list <- mclapply(X = counts.list,
FUN = function(y) {
pmf <- saddlepoint_calc(mus = mus,
phis = phis,
counts = y)
return(pmf) },
mc.cores = n.cores)

saddlepoint.pmf <- Reduce(c, pmf.list)
}

if (isTRUE( normalize )){
saddlepoint.pmf <- saddlepoint.pmf / sum( saddlepoint.pmf )
}

return( saddlepoint.pmf )
}
59 changes: 59 additions & 0 deletions R/nbconv_params.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Calculates distribution parameters for the convolution of arbitrary negative binomial random variables.
#'
#'@param mus Vector of individual mean values
#'@param phis Vector of individual dispersion parameters. Equivalent to 'size' in dnbinom.
#'@param ps Vector of individual probabilities of success.
#'
#'@returns A named numeric vector of distribution parameters.
#'
#'@examples nbconv_params(mus = c(100, 10), phis = c(5, 8))
#'
#'@import parallel
#'@import matrixStats
#'
#'@export
#'
nbconv_params <- function(mus, phis, ps){

if (!missing(ps) & !missing(mus)){
stop("'mus' and 'ps' both specified", call. = FALSE)
}

if (missing(ps) & missing(mus)){
stop("One of 'mus' and 'ps' must be specified", call. = FALSE)
}

if (missing(mus) & !missing(ps)){
mus <- phis*(1 - ps)/ps
}

if (length(mus) != length(phis)){
stop("'mus' and 'phis' must have the same length.", call. = FALSE)
}

# First four cumulants expressed in terms of mu and phi
k1 <- sum( mus )
k2 <- sum( mus + mus^2 / phis )
k3 <- sum( ( 2 * mus + phis ) * ( mus + phis ) * mus / phis^2 )
k4 <- sum( ( 6 * mus^2 + 6 * mus * phis + phis^2)*( mus + phis ) * mus / phis^3 )

# Central moments expressed in terms of cumulants
# m1 <- k1
# m2 <- k2
# m3 <- k3
# m4 <- k4 + 3 * k2^2

mean <- k1
sigma2 <- k2
skewness <- k3 / k2^(3/2)
ekurtosis <- k4 / k2^2

pmax <- max( phis / ( phis + mus ) )
qmax <- 1 - pmax

K.mean <- ( mean * pmax / qmax ) - sum( phis )

params <- c( mean = mean, sigma2 = sigma2, skewness = skewness, ekurtosis = ekurtosis, K.mean = K.mean)

return( params )
}

0 comments on commit caea8f2

Please sign in to comment.