Merge pull request #127 from better/mcmc-not-ci

Change the `ci` flag to `mcmc`
better · Mar 31, 2020 · 683e08c · 683e08c
2 parents 0eef63a + 84ec1a2
commit 683e08c
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 23 deletions.
diff --git a/convoys/plotting.py b/convoys/plotting.py
@@ -7,10 +7,10 @@
 
 _models = {
     'kaplan-meier': lambda ci: convoys.multi.KaplanMeier(),
-    'exponential': lambda ci: convoys.multi.Exponential(ci=ci),
-    'weibull': lambda ci: convoys.multi.Weibull(ci=ci),
-    'gamma': lambda ci: convoys.multi.Gamma(ci=ci),
-    'generalized-gamma': lambda ci: convoys.multi.GeneralizedGamma(ci=ci),
+    'exponential': lambda ci: convoys.multi.Exponential(mcmc=ci),
+    'weibull': lambda ci: convoys.multi.Weibull(mcmc=ci),
+    'gamma': lambda ci: convoys.multi.Gamma(mcmc=ci),
+    'generalized-gamma': lambda ci: convoys.multi.GeneralizedGamma(mcmc=ci),
 }
 
 

diff --git a/convoys/regression.py b/convoys/regression.py
@@ -75,7 +75,7 @@ class RegressionModel(object):
 class GeneralizedGamma(RegressionModel):
     ''' Generalization of Gamma, Weibull, and Exponential
 
-    :param ci: boolean, defaults to False. Whether to use MCMC to
+    :param mcmc: boolean, defaults to False. Whether to use MCMC to
         sample from the posterior so that a confidence interval can be
         estimated later (see :meth:`predict`).
     :param hierarchical: boolean denoting whether we have a (Normal) prior
@@ -86,6 +86,7 @@ class GeneralizedGamma(RegressionModel):
         linear model is fit, where the beta params will be completely
         additive. This creates a much more interpretable model, with some
         minor loss of accuracy.
+    :param ci: boolean, deprecated alias for `mcmc`.
 
     This mostly follows the `Wikipedia article
     <https://en.wikipedia.org/wiki/Generalized_gamma_distribution>`_, although
@@ -161,17 +162,21 @@ class GeneralizedGamma(RegressionModel):
     <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html#scipy.optimize.minimize>`_
     with the SLSQP method.
 
-    If `ci == True`, then `emcee <http://dfm.io/emcee/current/>`_ is used
+    If `mcmc == True`, then `emcee <http://dfm.io/emcee/current/>`_ is used
     to sample from the full posterior in order to generate uncertainty
     estimates for all parameters.
     '''
-    def __init__(self, ci=False, fix_k=None, fix_p=None, hierarchical=True,
-                 flavor='logistic'):
-        self._ci = ci
+    def __init__(self, mcmc=False, fix_k=None, fix_p=None, hierarchical=True,
+                 flavor='logistic', ci=None):
+        self._mcmc = mcmc
         self._fix_k = fix_k
         self._fix_p = fix_p
         self._hierarchical = hierarchical
         self._flavor = flavor
+        if ci is not None:
+            warnings.warn('The `ci` argument is deprecated in 0.2.1 in favor '
+                          ' of `mcmc`.', DeprecationWarning)
+            self._mcmc = ci
 
     def fit(self, X, B, T, W=None):
         '''Fits the model.
@@ -241,7 +246,7 @@ def callback(LL, value_history=[]):
                           'Norm of gradient is %f' % gradient_norm)
 
         # Let's sample from the posterior to compute uncertainties
-        if self._ci:
+        if self._mcmc:
             dim, = res.x.shape
             n_walkers = 5*dim
             sampler = emcee.EnsembleSampler(
@@ -294,10 +299,10 @@ def _predict(self, params, x, t):
     def predict_posteriori(self, x, t):
         ''' Returns the trace samples generated via the MCMC steps.
 
-        Requires the model to be fit with `ci = True`.'''
+        Requires the model to be fit with `mcmc == True`.'''
         x = numpy.array(x)
         t = numpy.array(t)
-        assert self._ci
+        assert self._mcmc
         params = self.params['samples']
         t = numpy.expand_dims(t, -1)
         return self._predict(params, x, t)
@@ -319,7 +324,7 @@ def predict_ci(self, x, t, ci=0.8):
 
     def predict(self, x, t):
         '''Returns the value of the cumulative distribution function
-        for a fitted model.
+        for a fitted model (using the maximum a posteriori estimate).
 
         :param x: feature vector (or matrix)
         :param t: time
@@ -334,7 +339,7 @@ def rvs(self, x, n_curves=1, n_samples=1, T=None):
 
         T is optional and means we already observed non-conversion until T
         '''
-        assert self._ci  # Need to be fit with MCMC
+        assert self._mcmc  # Need to be fit with MCMC
         if T is None:
             T = numpy.zeros((n_curves, n_samples))
         else:

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 '''
 
 setup(name='convoys',
-      version='0.2.0',
+      version='0.2.1',
       description='Fit machine learning models to predict conversion using Weibull and Gamma distributions',
       long_description=long_description,
       url='https://better.engineering/convoys',

diff --git a/test_convoys.py b/test_convoys.py
@@ -64,7 +64,7 @@ def test_output_shapes(c=0.3, lambd=0.1, n=1000, k=5):
     B, T = generate_censored_data(N, E, C)
 
     # Fit model with ci
-    model = convoys.regression.Exponential(ci=True)
+    model = convoys.regression.Exponential(mcmc=True)
     model.fit(X, B, T)
 
     # Generate output without ci
@@ -91,7 +91,7 @@ def test_output_shapes(c=0.3, lambd=0.1, n=1000, k=5):
     assert model.cdf(X[0], 0, ci=0.8).shape == (3,)
 
     # Fit model without ci (should be the same)
-    model = convoys.regression.Exponential(ci=False)
+    model = convoys.regression.Exponential(mcmc=False)
     model.fit(X, B, T)
     assert model.predict(X[0], 0).shape == ()
     assert model.predict([X[0], X[1]], [0, 1]).shape == (2,)
@@ -104,7 +104,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
     N = scipy.stats.uniform.rvs(scale=5./lambd, size=(n,))  # time now
     E = scipy.stats.expon.rvs(scale=1./lambd, size=(n,))  # time of event
     B, T = generate_censored_data(N, E, C)
-    model = convoys.regression.Exponential(ci=True)
+    model = convoys.regression.Exponential(mcmc=True)
     model.fit(X, B, T)
     assert 0.80*c < model.predict([1], float('inf')) < 1.30*c
     for t in [1, 3, 10]:
@@ -126,7 +126,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
         assert 0.70*d < (convert_times < t).mean() < 1.30*d
 
     # Fit a linear model
-    model = convoys.regression.Exponential(ci=False, flavor='linear')
+    model = convoys.regression.Exponential(mcmc=False, flavor='linear')
     model.fit(X, B, T)
     model_c = model.params['map']['b'] + model.params['map']['beta'][0]
     assert 0.9*c < model_c < 1.1*c
@@ -162,7 +162,7 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7],
         assert 0.80 * c < model.predict(x, float('inf')) < 1.30 * c
 
     # Fit a linear model
-    model = convoys.regression.Weibull(ci=False, flavor='linear')
+    model = convoys.regression.Weibull(mcmc=False, flavor='linear')
     model.fit(X, B, T)
     model_cs = model.params['map']['b'] + model.params['map']['beta']
     for model_c, c in zip(model_cs, cs):
@@ -184,7 +184,7 @@ def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=10000):
     assert 0.80*k < numpy.mean(model.params['map']['k']) < 1.30*k
 
     # Fit a linear model
-    model = convoys.regression.Gamma(ci=False, flavor='linear')
+    model = convoys.regression.Gamma(mcmc=False, flavor='linear')
     model.fit(X, B, T)
     model_c = model.params['map']['b'] + model.params['map']['beta'][0]
     assert 0.9*c < model_c < 1.1*c
@@ -201,7 +201,7 @@ def test_linear_model(n=10000, m=5, k=3.0, lambd=0.1):
     E = numpy.array([sample_weibull(k, lambd) for r in range(n)])
     B, T = generate_censored_data(N, E, C)
 
-    model = convoys.regression.Weibull(ci=False, flavor='linear')
+    model = convoys.regression.Weibull(mcmc=False, flavor='linear')
     model.fit(X, B, T)
 
     # Check the fitted parameters
@@ -365,7 +365,7 @@ def _test_plot_cohorts(model='weibull', extra_model=None):
 def test_plot_cohorts_model():
     df = _generate_dataframe()
     unit, groups, (G, B, T) = convoys.utils.get_arrays(df)
-    model = convoys.multi.Exponential(ci=None)
+    model = convoys.multi.Exponential(mcmc=None)
     model.fit(G, B, T)
     matplotlib.pyplot.clf()
     convoys.plotting.plot_cohorts(G, B, T, model=model, groups=groups)