Merge eed6d45 into 46c8656

better · Mar 30, 2020 · 2408fab · 2408fab
2 parents 46c8656 + eed6d45
commit 2408fab
Show file tree

Hide file tree

Showing 7 changed files with 157 additions and 96 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,7 +4,7 @@ python:
   - "3.6"
   - "3.7"
 script:
-  - pip install -U sphinx
+  - pip install -U sphinx deprecated
   - sphinx-build -M html docs/ docs/_build/ -W  # Try building it before installing the rest, should work
   - pip install -U coveralls flaky pytest pytest-cov
   - pip install .

diff --git a/convoys/multi.py b/convoys/multi.py
@@ -1,3 +1,4 @@
+from deprecated.sphinx import deprecated
 import numpy
 from convoys import regression
 from convoys import single
@@ -34,12 +35,24 @@ def _get_x(self, group):
         x[group] = 1
         return x
 
-    def cdf(self, group, *args, **kwargs):
-        return self.base_model.cdf(self._get_x(group), *args, **kwargs)
+    def predict(self, group, t):
+        return self.base_model.predict(self._get_x(group), t)
+
+    def predict_ci(self, group, t, ci):
+        return self.base_model.predict_ci(self._get_x(group), t, ci)
 
     def rvs(self, group, *args, **kwargs):
         return self.base_model.rvs(self._get_x(group), *args, **kwargs)
 
+    @deprecated(version='0.1.8',
+                reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
+    def cdf(self, group, t, ci=None):
+        '''Returns the predicted values.'''
+        if ci is not None:
+            return self.predict_ci(group, t, ci)
+        else:
+            return self.predict(group, t)
+
 
 class SingleToMulti(MultiModel):
     def __init__(self, *args, **kwargs):
@@ -60,8 +73,20 @@ def fit(self, G, B, T):
             self._group2model[g] = self.base_model_init()
             self._group2model[g].fit([b for b, t in BT], [t for b, t in BT])
 
-    def cdf(self, group, t, *args, **kwargs):
-        return self._group2model[group].cdf(t, *args, **kwargs)
+    def predict(self, group, t):
+        return self._group2model[group].predict(t)
+
+    def predict_ci(self, group, t, ci):
+        return self._group2model[group].predict_ci(t, ci)
+
+    @deprecated(version='0.1.8',
+                reason='Use :meth:`predict` or :meth:`predict_ci` instead')
+    def cdf(self, group, t, ci=None):
+        '''Returns the predicted values.'''
+        if ci is not None:
+            return self.predict_ci(group, t, ci)
+        else:
+            return self.predict(group, t)
 
 
 class Exponential(RegressionToMulti):

diff --git a/convoys/plotting.py b/convoys/plotting.py
@@ -81,14 +81,14 @@ def plot_cohorts(G, B, T, t_max=None, model='kaplan-meier',
         label = label_fmt % dict(group=group, n=n, k=k)
 
         if ci is not None:
-            p_y, p_y_lo, p_y_hi = m.cdf(j, t, ci=ci).T
+            p_y, p_y_lo, p_y_hi = m.predict_ci(j, t, ci=ci).T
             merged_plot_ci_kwargs = {'alpha': 0.2}
             merged_plot_ci_kwargs.update(plot_ci_kwargs)
             p = ax.fill_between(t, 100. * p_y_lo, 100. * p_y_hi,
                                 **merged_plot_ci_kwargs)
             color = p.get_facecolor()[0]  # reuse color for the line
         else:
-            p_y = m.cdf(j, t).T
+            p_y = m.predict(j, t).T
             color = None
 
         merged_plot_kwargs = {'color': color, 'linewidth': 1.5,

diff --git a/convoys/regression.py b/convoys/regression.py
@@ -1,6 +1,7 @@
 from convoys import autograd_scipy_monkeypatch  # NOQA
 import autograd
 from autograd_gamma import gammainc
+from deprecated.sphinx import deprecated
 import emcee
 import numpy
 from scipy.special import gammaincinv
@@ -76,7 +77,7 @@ class GeneralizedGamma(RegressionModel):
 
     :param ci: boolean, defaults to False. Whether to use MCMC to
         sample from the posterior so that a confidence interval can be
-        estimated later (see :meth:`cdf`).
+        estimated later (see :meth:`predict`).
     :param hierarchical: boolean denoting whether we have a (Normal) prior
         on the alpha and beta parameters to regularize. The variance of
         the normal distribution is in itself assumed to be an inverse
@@ -278,25 +279,7 @@ def callback(LL, value_history=[]):
             'beta': data[6+n_features:6+2*n_features].T,
         } for k, data in result.items()}
 
-    def cdf_posteriori(self, x, t, ci=None):
-        '''Returns the value of the cumulative distribution function
-        for a fitted model.
-
-        :param x: feature vector (or matrix)
-        :param t: time
-        :param ci: if this is provided, and the model was fit with
-            `ci = True`, then the return value will be the trace
-            samples generated via the MCMC steps. If this is not
-            provided, then the max a posteriori prediction will be used.
-        '''
-        x = numpy.array(x)
-        t = numpy.array(t)
-        if ci is None:
-            params = self.params['map']
-        else:
-            assert self._ci
-            params = self.params['samples']
-            t = numpy.expand_dims(t, -1)
+    def _predict(self, params, x, t):
         lambd = exp(dot(x, params['alpha'].T) + params['a'])
         if self._flavor == 'logistic':
             c = expit(dot(x, params['beta'].T) + params['b'])
@@ -308,34 +291,49 @@ def cdf_posteriori(self, x, t, ci=None):
 
         return M
 
-    def cdf(self, x, t, ci=None):
+    def predict_posteriori(self, x, t):
+        ''' Returns the trace samples generated via the MCMC steps.
+
+        Requires the model to be fit with `ci = True`.'''
+        x = numpy.array(x)
+        t = numpy.array(t)
+        assert self._ci
+        params = self.params['samples']
+        t = numpy.expand_dims(t, -1)
+        return self._predict(params, x, t)
+
+    def predict_ci(self, x, t, ci=0.8):
+        '''Works like :meth:`predict` but produces a confidence interval.
+
+        Requires the model to be fit with `ci = True`. The return value
+        will contain one more dimension than for :meth:`predict`, and
+        the last dimension will have size 3, containing the mean, the
+        lower bound of the confidence interval, and the upper bound of
+        the confidence interval.
+        '''
+        M = self.predict_posteriori(x, t)
+        y = numpy.mean(M, axis=-1)
+        y_lo = numpy.percentile(M, (1-ci)*50, axis=-1)
+        y_hi = numpy.percentile(M, (1+ci)*50, axis=-1)
+        return numpy.stack((y, y_lo, y_hi), axis=-1)
+
+    def predict(self, x, t):
         '''Returns the value of the cumulative distribution function
-        for a fitted model. TODO: this should probably be renamed
-        "predict" in the future to follow the scikit-learn convention.
+        for a fitted model.
 
         :param x: feature vector (or matrix)
         :param t: time
-        :param ci: if this is provided, and the model was fit with
-            `ci = True`, then the return value will contain one more
-            dimension, and the last dimension will have size 3,
-            containing the mean, the lower bound of the confidence
-            interval, and the upper bound of the confidence interval.
-            If this is not provided, then the max a posteriori
-            prediction will be used.
         '''
-        M = self.cdf_posteriori(x, t, ci)
-        if not ci:
-            return M
-        else:
-            # Replace the last axis with a 3-element vector
-            y = numpy.mean(M, axis=-1)
-            y_lo = numpy.percentile(M, (1-ci)*50, axis=-1)
-            y_hi = numpy.percentile(M, (1+ci)*50, axis=-1)
-            return numpy.stack((y, y_lo, y_hi), axis=-1)
+        params = self.params['map']
+        x = numpy.array(x)
+        t = numpy.array(t)
+        return self._predict(params, x, t)
 
     def rvs(self, x, n_curves=1, n_samples=1, T=None):
-        # Samples values from this distribution
-        # T is optional and means we already observed non-conversion until T
+        ''' Samples values from this distribution
+
+        T is optional and means we already observed non-conversion until T
+        '''
         assert self._ci  # Need to be fit with MCMC
         if T is None:
             T = numpy.zeros((n_curves, n_samples))
@@ -364,6 +362,21 @@ def rvs(self, x, n_curves=1, n_samples=1, T=None):
 
         return B, C
 
+    @deprecated(version='0.1.8',
+                reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
+    def cdf(self, x, t, ci=False):
+        '''Returns the predicted values.'''
+        if ci:
+            return self.predict_ci(x, t)
+        else:
+            return self.predict(x, t)
+
+    @deprecated(version='0.1.8',
+                reason='Use :meth:`predict_posteriori` instead.')
+    def cdf_posteriori(self, x, t):
+        '''Returns the a posterior distribution of the predicted values.'''
+        return self.predict_posteriori(x, t)
+
 
 class Exponential(GeneralizedGamma):
     ''' Specialization of :class:`.GeneralizedGamma` where :math:`k=1, p=1`.

diff --git a/convoys/single.py b/convoys/single.py
@@ -1,3 +1,4 @@
+from deprecated.sphinx import deprecated
 import numpy
 from scipy.special import expit, logit
 import scipy.stats
@@ -51,29 +52,46 @@ def fit(self, B, T):
         eps = 1e-9
         self._ss_clipped = numpy.clip(self._ss, eps, 1.0-eps)
 
-    def _get_value_at(self, j, ci):
-        if ci:
-            z_lo, z_hi = scipy.stats.norm.ppf([(1-ci)/2, (1+ci)/2])
-            return (
-                1 - self._ss[j],
-                1 - numpy.exp(-numpy.exp(
-                        numpy.log(-numpy.log(self._ss_clipped[j]))
-                        + z_hi * self._vs[j]**0.5)),
-                1 - numpy.exp(-numpy.exp(
-                        numpy.log(-numpy.log(self._ss_clipped[j]))
-                        + z_lo * self._vs[j]**0.5))
-            )
-        else:
-            return 1 - self._ss[j]
+    def predict(self, t):
+        '''Returns the predicted values.'''
+        t = numpy.array(t)
+        res = numpy.zeros(t.shape)
+        for indexes, value in numpy.ndenumerate(t):
+            j = numpy.searchsorted(self._ts, value, side='right') - 1
+            if j >= len(self._ts) - 1:
+                # Make the plotting stop at the last value of t
+                res[indexes] = float('nan')
+            else:
+                res[indexes] = 1 - self._ss[j]
+        return res
 
-    def cdf(self, t, ci=None):
+    def predict_ci(self, t, ci=0.8):
+        '''Returns the predicted values with a confidence interval.'''
         t = numpy.array(t)
-        res = numpy.zeros(t.shape + (3,) if ci else t.shape)
+        res = numpy.zeros(t.shape + (3,))
         for indexes, value in numpy.ndenumerate(t):
             j = numpy.searchsorted(self._ts, value, side='right') - 1
             if j >= len(self._ts) - 1:
                 # Make the plotting stop at the last value of t
-                res[indexes] = [float('nan')]*3 if ci else float('nan')
+                res[indexes] = [float('nan')]*3
             else:
-                res[indexes] = self._get_value_at(j, ci)
+                z_lo, z_hi = scipy.stats.norm.ppf([(1-ci)/2, (1+ci)/2])
+                res[indexes] = (
+                    1 - self._ss[j],
+                    1 - numpy.exp(-numpy.exp(
+                            numpy.log(-numpy.log(self._ss_clipped[j]))
+                            + z_hi * self._vs[j]**0.5)),
+                    1 - numpy.exp(-numpy.exp(
+                            numpy.log(-numpy.log(self._ss_clipped[j]))
+                            + z_lo * self._vs[j]**0.5))
+                    )
         return res
+
+    @deprecated(version='0.1.8',
+                reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
+    def cdf(self, t, ci=None):
+        '''Returns the predicted values.'''
+        if ci is not None:
+            return self.predict_ci(t)
+        else:
+            return self.predict(t)
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 '''
 
 setup(name='convoys',
-      version='0.1.7',
+      version='0.1.8',
       description='Fit machine learning models to predict conversion using Weibull and Gamma distributions',
       long_description=long_description,
       url='https://better.engineering/convoys',
@@ -21,6 +21,7 @@
       install_requires=[
           'autograd',
           'autograd-gamma>=0.2.0',
+          'deprecated',
           'emcee>=3.0.0',
           'matplotlib>=2.0.0',
           'pandas>=0.24.0',