Skip to content

Commit

Permalink
Merge eed6d45 into 46c8656
Browse files Browse the repository at this point in the history
  • Loading branch information
erikbern committed Mar 30, 2020
2 parents 46c8656 + eed6d45 commit 2408fab
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 96 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ python:
- "3.6"
- "3.7"
script:
- pip install -U sphinx
- pip install -U sphinx deprecated
- sphinx-build -M html docs/ docs/_build/ -W # Try building it before installing the rest, should work
- pip install -U coveralls flaky pytest pytest-cov
- pip install .
Expand Down
33 changes: 29 additions & 4 deletions convoys/multi.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from deprecated.sphinx import deprecated
import numpy
from convoys import regression
from convoys import single
Expand Down Expand Up @@ -34,12 +35,24 @@ def _get_x(self, group):
x[group] = 1
return x

def cdf(self, group, *args, **kwargs):
return self.base_model.cdf(self._get_x(group), *args, **kwargs)
def predict(self, group, t):
return self.base_model.predict(self._get_x(group), t)

def predict_ci(self, group, t, ci):
return self.base_model.predict_ci(self._get_x(group), t, ci)

def rvs(self, group, *args, **kwargs):
return self.base_model.rvs(self._get_x(group), *args, **kwargs)

@deprecated(version='0.1.8',
reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
def cdf(self, group, t, ci=None):
'''Returns the predicted values.'''
if ci is not None:
return self.predict_ci(group, t, ci)
else:
return self.predict(group, t)


class SingleToMulti(MultiModel):
def __init__(self, *args, **kwargs):
Expand All @@ -60,8 +73,20 @@ def fit(self, G, B, T):
self._group2model[g] = self.base_model_init()
self._group2model[g].fit([b for b, t in BT], [t for b, t in BT])

def cdf(self, group, t, *args, **kwargs):
return self._group2model[group].cdf(t, *args, **kwargs)
def predict(self, group, t):
return self._group2model[group].predict(t)

def predict_ci(self, group, t, ci):
return self._group2model[group].predict_ci(t, ci)

@deprecated(version='0.1.8',
reason='Use :meth:`predict` or :meth:`predict_ci` instead')
def cdf(self, group, t, ci=None):
'''Returns the predicted values.'''
if ci is not None:
return self.predict_ci(group, t, ci)
else:
return self.predict(group, t)


class Exponential(RegressionToMulti):
Expand Down
4 changes: 2 additions & 2 deletions convoys/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,14 @@ def plot_cohorts(G, B, T, t_max=None, model='kaplan-meier',
label = label_fmt % dict(group=group, n=n, k=k)

if ci is not None:
p_y, p_y_lo, p_y_hi = m.cdf(j, t, ci=ci).T
p_y, p_y_lo, p_y_hi = m.predict_ci(j, t, ci=ci).T
merged_plot_ci_kwargs = {'alpha': 0.2}
merged_plot_ci_kwargs.update(plot_ci_kwargs)
p = ax.fill_between(t, 100. * p_y_lo, 100. * p_y_hi,
**merged_plot_ci_kwargs)
color = p.get_facecolor()[0] # reuse color for the line
else:
p_y = m.cdf(j, t).T
p_y = m.predict(j, t).T
color = None

merged_plot_kwargs = {'color': color, 'linewidth': 1.5,
Expand Down
95 changes: 54 additions & 41 deletions convoys/regression.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from convoys import autograd_scipy_monkeypatch # NOQA
import autograd
from autograd_gamma import gammainc
from deprecated.sphinx import deprecated
import emcee
import numpy
from scipy.special import gammaincinv
Expand Down Expand Up @@ -76,7 +77,7 @@ class GeneralizedGamma(RegressionModel):
:param ci: boolean, defaults to False. Whether to use MCMC to
sample from the posterior so that a confidence interval can be
estimated later (see :meth:`cdf`).
estimated later (see :meth:`predict`).
:param hierarchical: boolean denoting whether we have a (Normal) prior
on the alpha and beta parameters to regularize. The variance of
the normal distribution is in itself assumed to be an inverse
Expand Down Expand Up @@ -278,25 +279,7 @@ def callback(LL, value_history=[]):
'beta': data[6+n_features:6+2*n_features].T,
} for k, data in result.items()}

def cdf_posteriori(self, x, t, ci=None):
'''Returns the value of the cumulative distribution function
for a fitted model.
:param x: feature vector (or matrix)
:param t: time
:param ci: if this is provided, and the model was fit with
`ci = True`, then the return value will be the trace
samples generated via the MCMC steps. If this is not
provided, then the max a posteriori prediction will be used.
'''
x = numpy.array(x)
t = numpy.array(t)
if ci is None:
params = self.params['map']
else:
assert self._ci
params = self.params['samples']
t = numpy.expand_dims(t, -1)
def _predict(self, params, x, t):
lambd = exp(dot(x, params['alpha'].T) + params['a'])
if self._flavor == 'logistic':
c = expit(dot(x, params['beta'].T) + params['b'])
Expand All @@ -308,34 +291,49 @@ def cdf_posteriori(self, x, t, ci=None):

return M

def cdf(self, x, t, ci=None):
def predict_posteriori(self, x, t):
''' Returns the trace samples generated via the MCMC steps.
Requires the model to be fit with `ci = True`.'''
x = numpy.array(x)
t = numpy.array(t)
assert self._ci
params = self.params['samples']
t = numpy.expand_dims(t, -1)
return self._predict(params, x, t)

def predict_ci(self, x, t, ci=0.8):
'''Works like :meth:`predict` but produces a confidence interval.
Requires the model to be fit with `ci = True`. The return value
will contain one more dimension than for :meth:`predict`, and
the last dimension will have size 3, containing the mean, the
lower bound of the confidence interval, and the upper bound of
the confidence interval.
'''
M = self.predict_posteriori(x, t)
y = numpy.mean(M, axis=-1)
y_lo = numpy.percentile(M, (1-ci)*50, axis=-1)
y_hi = numpy.percentile(M, (1+ci)*50, axis=-1)
return numpy.stack((y, y_lo, y_hi), axis=-1)

def predict(self, x, t):
'''Returns the value of the cumulative distribution function
for a fitted model. TODO: this should probably be renamed
"predict" in the future to follow the scikit-learn convention.
for a fitted model.
:param x: feature vector (or matrix)
:param t: time
:param ci: if this is provided, and the model was fit with
`ci = True`, then the return value will contain one more
dimension, and the last dimension will have size 3,
containing the mean, the lower bound of the confidence
interval, and the upper bound of the confidence interval.
If this is not provided, then the max a posteriori
prediction will be used.
'''
M = self.cdf_posteriori(x, t, ci)
if not ci:
return M
else:
# Replace the last axis with a 3-element vector
y = numpy.mean(M, axis=-1)
y_lo = numpy.percentile(M, (1-ci)*50, axis=-1)
y_hi = numpy.percentile(M, (1+ci)*50, axis=-1)
return numpy.stack((y, y_lo, y_hi), axis=-1)
params = self.params['map']
x = numpy.array(x)
t = numpy.array(t)
return self._predict(params, x, t)

def rvs(self, x, n_curves=1, n_samples=1, T=None):
# Samples values from this distribution
# T is optional and means we already observed non-conversion until T
''' Samples values from this distribution
T is optional and means we already observed non-conversion until T
'''
assert self._ci # Need to be fit with MCMC
if T is None:
T = numpy.zeros((n_curves, n_samples))
Expand Down Expand Up @@ -364,6 +362,21 @@ def rvs(self, x, n_curves=1, n_samples=1, T=None):

return B, C

@deprecated(version='0.1.8',
reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
def cdf(self, x, t, ci=False):
'''Returns the predicted values.'''
if ci:
return self.predict_ci(x, t)
else:
return self.predict(x, t)

@deprecated(version='0.1.8',
reason='Use :meth:`predict_posteriori` instead.')
def cdf_posteriori(self, x, t):
'''Returns the a posterior distribution of the predicted values.'''
return self.predict_posteriori(x, t)


class Exponential(GeneralizedGamma):
''' Specialization of :class:`.GeneralizedGamma` where :math:`k=1, p=1`.
Expand Down
54 changes: 36 additions & 18 deletions convoys/single.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from deprecated.sphinx import deprecated
import numpy
from scipy.special import expit, logit
import scipy.stats
Expand Down Expand Up @@ -51,29 +52,46 @@ def fit(self, B, T):
eps = 1e-9
self._ss_clipped = numpy.clip(self._ss, eps, 1.0-eps)

def _get_value_at(self, j, ci):
if ci:
z_lo, z_hi = scipy.stats.norm.ppf([(1-ci)/2, (1+ci)/2])
return (
1 - self._ss[j],
1 - numpy.exp(-numpy.exp(
numpy.log(-numpy.log(self._ss_clipped[j]))
+ z_hi * self._vs[j]**0.5)),
1 - numpy.exp(-numpy.exp(
numpy.log(-numpy.log(self._ss_clipped[j]))
+ z_lo * self._vs[j]**0.5))
)
else:
return 1 - self._ss[j]
def predict(self, t):
'''Returns the predicted values.'''
t = numpy.array(t)
res = numpy.zeros(t.shape)
for indexes, value in numpy.ndenumerate(t):
j = numpy.searchsorted(self._ts, value, side='right') - 1
if j >= len(self._ts) - 1:
# Make the plotting stop at the last value of t
res[indexes] = float('nan')
else:
res[indexes] = 1 - self._ss[j]
return res

def cdf(self, t, ci=None):
def predict_ci(self, t, ci=0.8):
'''Returns the predicted values with a confidence interval.'''
t = numpy.array(t)
res = numpy.zeros(t.shape + (3,) if ci else t.shape)
res = numpy.zeros(t.shape + (3,))
for indexes, value in numpy.ndenumerate(t):
j = numpy.searchsorted(self._ts, value, side='right') - 1
if j >= len(self._ts) - 1:
# Make the plotting stop at the last value of t
res[indexes] = [float('nan')]*3 if ci else float('nan')
res[indexes] = [float('nan')]*3
else:
res[indexes] = self._get_value_at(j, ci)
z_lo, z_hi = scipy.stats.norm.ppf([(1-ci)/2, (1+ci)/2])
res[indexes] = (
1 - self._ss[j],
1 - numpy.exp(-numpy.exp(
numpy.log(-numpy.log(self._ss_clipped[j]))
+ z_hi * self._vs[j]**0.5)),
1 - numpy.exp(-numpy.exp(
numpy.log(-numpy.log(self._ss_clipped[j]))
+ z_lo * self._vs[j]**0.5))
)
return res

@deprecated(version='0.1.8',
reason='Use :meth:`predict` or :meth:`predict_ci` instead.')
def cdf(self, t, ci=None):
'''Returns the predicted values.'''
if ci is not None:
return self.predict_ci(t)
else:
return self.predict(t)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
'''

setup(name='convoys',
version='0.1.7',
version='0.1.8',
description='Fit machine learning models to predict conversion using Weibull and Gamma distributions',
long_description=long_description,
url='https://better.engineering/convoys',
Expand All @@ -21,6 +21,7 @@
install_requires=[
'autograd',
'autograd-gamma>=0.2.0',
'deprecated',
'emcee>=3.0.0',
'matplotlib>=2.0.0',
'pandas>=0.24.0',
Expand Down

0 comments on commit 2408fab

Please sign in to comment.