From c3711a37fdf75039b99c38030b74e43a103cdb6b Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Wed, 2 Feb 2022 12:04:17 -0500
Subject: [PATCH] Some updates for NegativeBinomial

* Uncomment the check_discrete_distribution tests.
* Expand the documentation--explain the meaning of `r` and `p`.
* Note that `r` can be real.

Closes gh-115.
---
 src/distribution/negative_binomial.rs | 99 +++++++++++++++++----------
 1 file changed, 64 insertions(+), 35 deletions(-)
diff --git a/src/distribution/negative_binomial.rs b/src/distribution/negative_binomial.rs
index bfb9c078..7601f382 100644
--- a/src/distribution/negative_binomial.rs
+++ b/src/distribution/negative_binomial.rs
@@ -6,8 +6,22 @@ use rand::Rng;
 use std::f64;
 
 /// Implements the
-/// [NegativeBinomial](http://en.wikipedia.org/wiki/Negative_binomial_distribution)
-/// distribution
+/// [negative binomial](http://en.wikipedia.org/wiki/Negative_binomial_distribution)
+/// distribution.
+///
+/// *Please note carefully the meaning of the parameters.*  As noted in the
+/// wikipedia article, there are several different commonly used conventions
+/// for the parameters of the negative binomial distribution.
+///
+/// The negative binomial distribution is a discrete distribution with two
+/// parameters, `r` and `p`.  When `r` is an integer, the negative binomial
+/// distribution can be interpreted as the distribution of the number of
+/// failures in a sequence of Bernoulli trials that continue until `r`
+/// successes occur.  `p` is the probability of success in a single Bernoulli
+/// trial.
+///
+/// `NegativeBinomial` accepts non-integer values for `r`.  This is a
+/// generalization of the more common case where `r` is an integer.
 ///
 /// # Examples
 ///
@@ -28,8 +42,11 @@ pub struct NegativeBinomial {
 }
 
 impl NegativeBinomial {
-    /// Constructs a new negative binomial distribution
-    /// with a given `p` probability of the number of successes `r`
+    /// Constructs a new negative binomial distribution with parameters `r`
+    /// and `p`.  When `r` is an integer, the negative binomial distribution
+    /// can be interpreted as the distribution of the number of failures in
+    /// a sequence of Bernoulli trials that continue until `r` successes occur.
+    /// `p` is the probability of success in a single Bernoulli trial.
     ///
     /// # Errors
     ///
@@ -55,8 +72,9 @@ impl NegativeBinomial {
         }
     }
 
-    /// Returns the probability of success `p` of
-    /// the negative binomial distribution.
+    /// Returns the probability of success `p` of a single
+    /// Bernoulli trial associated with the negative binomial
+    /// distribution.
     ///
     /// # Examples
     ///
@@ -71,7 +89,7 @@ impl NegativeBinomial {
     }
 
     /// Returns the number `r` of success of this negative
-    /// binomial distribution
+    /// binomial distribution.
     ///
     /// # Examples
     ///
@@ -95,13 +113,7 @@ impl ::rand::distributions::Distribution<u64> for NegativeBinomial {
 
 impl DiscreteCDF<u64, f64> for NegativeBinomial {
     /// Calculates the cumulative distribution function for the
-    /// negative binomial distribution at `x`
-    ///
-    /// Note that due to extending the distribution to the reals
-    /// (allowing positive real values for `r`), while still technically
-    /// a discrete distribution the CDF behaves more like that of a
-    /// continuous distribution rather than a discrete distribution
-    /// (i.e. a smooth graph rather than a step-ladder)
+    /// negative binomial distribution at `x`.
     ///
     /// # Formula
     ///
@@ -109,7 +121,7 @@ impl DiscreteCDF<u64, f64> for NegativeBinomial {
     /// 1 - I_(1 - p)(x + 1, r)
     /// ```
     ///
-    /// where `I_(x)(a, b)` is the regularized incomplete beta function
+    /// where `I_(x)(a, b)` is the regularized incomplete beta function.
     fn cdf(&self, x: u64) -> f64 {
         1.0 - beta::beta_reg(x as f64 + 1.0, self.r, 1.0 - self.p)
     }
@@ -118,7 +130,7 @@ impl DiscreteCDF<u64, f64> for NegativeBinomial {
 impl Min<u64> for NegativeBinomial {
     /// Returns the minimum value in the domain of the
     /// negative binomial distribution representable by a 64-bit
-    /// integer
+    /// integer.
     ///
     /// # Formula
     ///
@@ -133,7 +145,7 @@ impl Min<u64> for NegativeBinomial {
 impl Max<u64> for NegativeBinomial {
     /// Returns the maximum value in the domain of the
     /// negative binomial distribution representable by a 64-bit
-    /// integer
+    /// integer.
     ///
     /// # Formula
     ///
@@ -146,7 +158,7 @@ impl Max<u64> for NegativeBinomial {
 }
 
 impl DiscreteDistribution<f64> for NegativeBinomial {
-    /// Returns the mean of the negative binomial distribution
+    /// Returns the mean of the negative binomial distribution.
     ///
     /// # Formula
     ///
@@ -156,7 +168,7 @@ impl DiscreteDistribution<f64> for NegativeBinomial {
     fn mean(&self) -> Option<f64> {
         Some(self.r * (1.0 - self.p) / self.p)
     }
-    /// Returns the variance of the negative binomial distribution
+    /// Returns the variance of the negative binomial distribution.
     ///
     /// # Formula
     ///
@@ -166,7 +178,7 @@ impl DiscreteDistribution<f64> for NegativeBinomial {
     fn variance(&self) -> Option<f64> {
         Some(self.r * (1.0 - self.p) / (self.p * self.p))
     }
-    /// Returns the skewness of the negative binomial distribution
+    /// Returns the skewness of the negative binomial distribution.
     ///
     /// # Formula
     ///
@@ -179,7 +191,7 @@ impl DiscreteDistribution<f64> for NegativeBinomial {
 }
 
 impl Mode<Option<f64>> for NegativeBinomial {
-    /// Returns the mode for the negative binomial distribution
+    /// Returns the mode for the negative binomial distribution.
     ///
     /// # Formula
     ///
@@ -201,30 +213,50 @@ impl Mode<Option<f64>> for NegativeBinomial {
 
 impl Discrete<u64, f64> for NegativeBinomial {
     /// Calculates the probability mass function for the negative binomial
-    /// distribution at `x`
+    /// distribution at `x`.
     ///
     /// # Formula
     ///
+    /// When `r` is an integer, the formula is:
+    ///
+    /// ```ignore
+    /// (x + r - 1 choose x) * (1 - p)^x * p^r
+    /// ```
+    ///
+    /// The general formula for real `r` is:
+    ///
     /// ```ignore
-    /// (x + r - 1 choose k) * (1 - p)^x * p^r
+    /// Γ(r + x)/(Γ(r) * Γ(x + 1)) * (1 - p)^x * p^r
     /// ```
+    ///
+    /// where Γ(x) is the Gamma function.
     fn pmf(&self, x: u64) -> f64 {
         self.ln_pmf(x).exp()
     }
 
     /// Calculates the log probability mass function for the negative binomial
-    /// distribution at `x`
+    /// distribution at `x`.
     ///
     /// # Formula
     ///
+    /// When `r` is an integer, the formula is:
+    ///
     /// ```ignore
-    /// ln(x + r - 1 choose k) * (1 - p)^x * p^r))
+    /// ln((x + r - 1 choose x) * (1 - p)^x * p^r)
     /// ```
+    ///
+    /// The general formula for real `r` is:
+    ///
+    /// ```ignore
+    /// ln(Γ(r + x)/(Γ(r) * Γ(x + 1)) * (1 - p)^x * p^r)
+    /// ```
+    ///
+    /// where Γ(x) is the Gamma function.
     fn ln_pmf(&self, x: u64) -> f64 {
         let k = x as f64;
         gamma::ln_gamma(self.r + k) - gamma::ln_gamma(self.r) - gamma::ln_gamma(k + 1.0)
             + (self.r * self.p.ln())
-            + (k * (1.0 - self.p).ln())
+            + (k * (-self.p).ln_1p())
     }
 }
 
@@ -234,6 +266,7 @@ mod tests {
     use std::fmt::Debug;
     use crate::statistics::*;
     use crate::distribution::{DiscreteCDF, Discrete, NegativeBinomial};
+    use crate::distribution::internal::test;
     use crate::consts::ACC;
 
     fn try_create(r: f64, p: f64) -> NegativeBinomial {
@@ -424,13 +457,9 @@ mod tests {
         test_case(3.0, 0.5, 1.0, cdf(100));
     }
 
-    // TODO: figure out the best way to re-implement this test. We currently
-    // do not have a good way to characterize a discrete distribution with a
-    // CDF that is continuous
-    //
-    // #[test]
-    // fn test_discrete() {
-    //     test::check_discrete_distribution(&try_create(5.0, 0.3), 35);
-    //     test::check_discrete_distribution(&try_create(10.0, 0.7), 21);
-    // }
+    #[test]
+    fn test_discrete() {
+        test::check_discrete_distribution(&try_create(5.0, 0.3), 35);
+        test::check_discrete_distribution(&try_create(10.0, 0.7), 21);
+    }
 }