From c3711a37fdf75039b99c38030b74e43a103cdb6b Mon Sep 17 00:00:00 2001 From: warren Date: Wed, 2 Feb 2022 12:04:17 -0500 Subject: [PATCH] Some updates for NegativeBinomial * Uncomment the check_discrete_distribution tests. * Expand the documentation--explain the meaning of `r` and `p`. * Note that `r` can be real. Closes gh-115. --- src/distribution/negative_binomial.rs | 99 +++++++++++++++++---------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/src/distribution/negative_binomial.rs b/src/distribution/negative_binomial.rs index bfb9c078..7601f382 100644 --- a/src/distribution/negative_binomial.rs +++ b/src/distribution/negative_binomial.rs @@ -6,8 +6,22 @@ use rand::Rng; use std::f64; /// Implements the -/// [NegativeBinomial](http://en.wikipedia.org/wiki/Negative_binomial_distribution) -/// distribution +/// [negative binomial](http://en.wikipedia.org/wiki/Negative_binomial_distribution) +/// distribution. +/// +/// *Please note carefully the meaning of the parameters.* As noted in the +/// wikipedia article, there are several different commonly used conventions +/// for the parameters of the negative binomial distribution. +/// +/// The negative binomial distribution is a discrete distribution with two +/// parameters, `r` and `p`. When `r` is an integer, the negative binomial +/// distribution can be interpreted as the distribution of the number of +/// failures in a sequence of Bernoulli trials that continue until `r` +/// successes occur. `p` is the probability of success in a single Bernoulli +/// trial. +/// +/// `NegativeBinomial` accepts non-integer values for `r`. This is a +/// generalization of the more common case where `r` is an integer. /// /// # Examples /// @@ -28,8 +42,11 @@ pub struct NegativeBinomial { } impl NegativeBinomial { - /// Constructs a new negative binomial distribution - /// with a given `p` probability of the number of successes `r` + /// Constructs a new negative binomial distribution with parameters `r` + /// and `p`. When `r` is an integer, the negative binomial distribution + /// can be interpreted as the distribution of the number of failures in + /// a sequence of Bernoulli trials that continue until `r` successes occur. + /// `p` is the probability of success in a single Bernoulli trial. /// /// # Errors /// @@ -55,8 +72,9 @@ impl NegativeBinomial { } } - /// Returns the probability of success `p` of - /// the negative binomial distribution. + /// Returns the probability of success `p` of a single + /// Bernoulli trial associated with the negative binomial + /// distribution. /// /// # Examples /// @@ -71,7 +89,7 @@ impl NegativeBinomial { } /// Returns the number `r` of success of this negative - /// binomial distribution + /// binomial distribution. /// /// # Examples /// @@ -95,13 +113,7 @@ impl ::rand::distributions::Distribution for NegativeBinomial { impl DiscreteCDF for NegativeBinomial { /// Calculates the cumulative distribution function for the - /// negative binomial distribution at `x` - /// - /// Note that due to extending the distribution to the reals - /// (allowing positive real values for `r`), while still technically - /// a discrete distribution the CDF behaves more like that of a - /// continuous distribution rather than a discrete distribution - /// (i.e. a smooth graph rather than a step-ladder) + /// negative binomial distribution at `x`. /// /// # Formula /// @@ -109,7 +121,7 @@ impl DiscreteCDF for NegativeBinomial { /// 1 - I_(1 - p)(x + 1, r) /// ``` /// - /// where `I_(x)(a, b)` is the regularized incomplete beta function + /// where `I_(x)(a, b)` is the regularized incomplete beta function. fn cdf(&self, x: u64) -> f64 { 1.0 - beta::beta_reg(x as f64 + 1.0, self.r, 1.0 - self.p) } @@ -118,7 +130,7 @@ impl DiscreteCDF for NegativeBinomial { impl Min for NegativeBinomial { /// Returns the minimum value in the domain of the /// negative binomial distribution representable by a 64-bit - /// integer + /// integer. /// /// # Formula /// @@ -133,7 +145,7 @@ impl Min for NegativeBinomial { impl Max for NegativeBinomial { /// Returns the maximum value in the domain of the /// negative binomial distribution representable by a 64-bit - /// integer + /// integer. /// /// # Formula /// @@ -146,7 +158,7 @@ impl Max for NegativeBinomial { } impl DiscreteDistribution for NegativeBinomial { - /// Returns the mean of the negative binomial distribution + /// Returns the mean of the negative binomial distribution. /// /// # Formula /// @@ -156,7 +168,7 @@ impl DiscreteDistribution for NegativeBinomial { fn mean(&self) -> Option { Some(self.r * (1.0 - self.p) / self.p) } - /// Returns the variance of the negative binomial distribution + /// Returns the variance of the negative binomial distribution. /// /// # Formula /// @@ -166,7 +178,7 @@ impl DiscreteDistribution for NegativeBinomial { fn variance(&self) -> Option { Some(self.r * (1.0 - self.p) / (self.p * self.p)) } - /// Returns the skewness of the negative binomial distribution + /// Returns the skewness of the negative binomial distribution. /// /// # Formula /// @@ -179,7 +191,7 @@ impl DiscreteDistribution for NegativeBinomial { } impl Mode> for NegativeBinomial { - /// Returns the mode for the negative binomial distribution + /// Returns the mode for the negative binomial distribution. /// /// # Formula /// @@ -201,30 +213,50 @@ impl Mode> for NegativeBinomial { impl Discrete for NegativeBinomial { /// Calculates the probability mass function for the negative binomial - /// distribution at `x` + /// distribution at `x`. /// /// # Formula /// + /// When `r` is an integer, the formula is: + /// + /// ```ignore + /// (x + r - 1 choose x) * (1 - p)^x * p^r + /// ``` + /// + /// The general formula for real `r` is: + /// /// ```ignore - /// (x + r - 1 choose k) * (1 - p)^x * p^r + /// Γ(r + x)/(Γ(r) * Γ(x + 1)) * (1 - p)^x * p^r /// ``` + /// + /// where Γ(x) is the Gamma function. fn pmf(&self, x: u64) -> f64 { self.ln_pmf(x).exp() } /// Calculates the log probability mass function for the negative binomial - /// distribution at `x` + /// distribution at `x`. /// /// # Formula /// + /// When `r` is an integer, the formula is: + /// /// ```ignore - /// ln(x + r - 1 choose k) * (1 - p)^x * p^r)) + /// ln((x + r - 1 choose x) * (1 - p)^x * p^r) /// ``` + /// + /// The general formula for real `r` is: + /// + /// ```ignore + /// ln(Γ(r + x)/(Γ(r) * Γ(x + 1)) * (1 - p)^x * p^r) + /// ``` + /// + /// where Γ(x) is the Gamma function. fn ln_pmf(&self, x: u64) -> f64 { let k = x as f64; gamma::ln_gamma(self.r + k) - gamma::ln_gamma(self.r) - gamma::ln_gamma(k + 1.0) + (self.r * self.p.ln()) - + (k * (1.0 - self.p).ln()) + + (k * (-self.p).ln_1p()) } } @@ -234,6 +266,7 @@ mod tests { use std::fmt::Debug; use crate::statistics::*; use crate::distribution::{DiscreteCDF, Discrete, NegativeBinomial}; + use crate::distribution::internal::test; use crate::consts::ACC; fn try_create(r: f64, p: f64) -> NegativeBinomial { @@ -424,13 +457,9 @@ mod tests { test_case(3.0, 0.5, 1.0, cdf(100)); } - // TODO: figure out the best way to re-implement this test. We currently - // do not have a good way to characterize a discrete distribution with a - // CDF that is continuous - // - // #[test] - // fn test_discrete() { - // test::check_discrete_distribution(&try_create(5.0, 0.3), 35); - // test::check_discrete_distribution(&try_create(10.0, 0.7), 21); - // } + #[test] + fn test_discrete() { + test::check_discrete_distribution(&try_create(5.0, 0.3), 35); + test::check_discrete_distribution(&try_create(10.0, 0.7), 21); + } }