Skip to content

Commit

Permalink
Merge pull request #127 from better/mcmc-not-ci
Browse files Browse the repository at this point in the history
Change the `ci` flag to `mcmc`
  • Loading branch information
Erik Bernhardsson committed Mar 31, 2020
2 parents 0eef63a + 84ec1a2 commit 683e08c
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 23 deletions.
8 changes: 4 additions & 4 deletions convoys/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

_models = {
'kaplan-meier': lambda ci: convoys.multi.KaplanMeier(),
'exponential': lambda ci: convoys.multi.Exponential(ci=ci),
'weibull': lambda ci: convoys.multi.Weibull(ci=ci),
'gamma': lambda ci: convoys.multi.Gamma(ci=ci),
'generalized-gamma': lambda ci: convoys.multi.GeneralizedGamma(ci=ci),
'exponential': lambda ci: convoys.multi.Exponential(mcmc=ci),
'weibull': lambda ci: convoys.multi.Weibull(mcmc=ci),
'gamma': lambda ci: convoys.multi.Gamma(mcmc=ci),
'generalized-gamma': lambda ci: convoys.multi.GeneralizedGamma(mcmc=ci),
}


Expand Down
25 changes: 15 additions & 10 deletions convoys/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class RegressionModel(object):
class GeneralizedGamma(RegressionModel):
''' Generalization of Gamma, Weibull, and Exponential
:param ci: boolean, defaults to False. Whether to use MCMC to
:param mcmc: boolean, defaults to False. Whether to use MCMC to
sample from the posterior so that a confidence interval can be
estimated later (see :meth:`predict`).
:param hierarchical: boolean denoting whether we have a (Normal) prior
Expand All @@ -86,6 +86,7 @@ class GeneralizedGamma(RegressionModel):
linear model is fit, where the beta params will be completely
additive. This creates a much more interpretable model, with some
minor loss of accuracy.
:param ci: boolean, deprecated alias for `mcmc`.
This mostly follows the `Wikipedia article
<https://en.wikipedia.org/wiki/Generalized_gamma_distribution>`_, although
Expand Down Expand Up @@ -161,17 +162,21 @@ class GeneralizedGamma(RegressionModel):
<https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html#scipy.optimize.minimize>`_
with the SLSQP method.
If `ci == True`, then `emcee <http://dfm.io/emcee/current/>`_ is used
If `mcmc == True`, then `emcee <http://dfm.io/emcee/current/>`_ is used
to sample from the full posterior in order to generate uncertainty
estimates for all parameters.
'''
def __init__(self, ci=False, fix_k=None, fix_p=None, hierarchical=True,
flavor='logistic'):
self._ci = ci
def __init__(self, mcmc=False, fix_k=None, fix_p=None, hierarchical=True,
flavor='logistic', ci=None):
self._mcmc = mcmc
self._fix_k = fix_k
self._fix_p = fix_p
self._hierarchical = hierarchical
self._flavor = flavor
if ci is not None:
warnings.warn('The `ci` argument is deprecated in 0.2.1 in favor '
' of `mcmc`.', DeprecationWarning)
self._mcmc = ci

def fit(self, X, B, T, W=None):
'''Fits the model.
Expand Down Expand Up @@ -241,7 +246,7 @@ def callback(LL, value_history=[]):
'Norm of gradient is %f' % gradient_norm)

# Let's sample from the posterior to compute uncertainties
if self._ci:
if self._mcmc:
dim, = res.x.shape
n_walkers = 5*dim
sampler = emcee.EnsembleSampler(
Expand Down Expand Up @@ -294,10 +299,10 @@ def _predict(self, params, x, t):
def predict_posteriori(self, x, t):
''' Returns the trace samples generated via the MCMC steps.
Requires the model to be fit with `ci = True`.'''
Requires the model to be fit with `mcmc == True`.'''
x = numpy.array(x)
t = numpy.array(t)
assert self._ci
assert self._mcmc
params = self.params['samples']
t = numpy.expand_dims(t, -1)
return self._predict(params, x, t)
Expand All @@ -319,7 +324,7 @@ def predict_ci(self, x, t, ci=0.8):

def predict(self, x, t):
'''Returns the value of the cumulative distribution function
for a fitted model.
for a fitted model (using the maximum a posteriori estimate).
:param x: feature vector (or matrix)
:param t: time
Expand All @@ -334,7 +339,7 @@ def rvs(self, x, n_curves=1, n_samples=1, T=None):
T is optional and means we already observed non-conversion until T
'''
assert self._ci # Need to be fit with MCMC
assert self._mcmc # Need to be fit with MCMC
if T is None:
T = numpy.zeros((n_curves, n_samples))
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
'''

setup(name='convoys',
version='0.2.0',
version='0.2.1',
description='Fit machine learning models to predict conversion using Weibull and Gamma distributions',
long_description=long_description,
url='https://better.engineering/convoys',
Expand Down
16 changes: 8 additions & 8 deletions test_convoys.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_output_shapes(c=0.3, lambd=0.1, n=1000, k=5):
B, T = generate_censored_data(N, E, C)

# Fit model with ci
model = convoys.regression.Exponential(ci=True)
model = convoys.regression.Exponential(mcmc=True)
model.fit(X, B, T)

# Generate output without ci
Expand All @@ -91,7 +91,7 @@ def test_output_shapes(c=0.3, lambd=0.1, n=1000, k=5):
assert model.cdf(X[0], 0, ci=0.8).shape == (3,)

# Fit model without ci (should be the same)
model = convoys.regression.Exponential(ci=False)
model = convoys.regression.Exponential(mcmc=False)
model.fit(X, B, T)
assert model.predict(X[0], 0).shape == ()
assert model.predict([X[0], X[1]], [0, 1]).shape == (2,)
Expand All @@ -104,7 +104,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
N = scipy.stats.uniform.rvs(scale=5./lambd, size=(n,)) # time now
E = scipy.stats.expon.rvs(scale=1./lambd, size=(n,)) # time of event
B, T = generate_censored_data(N, E, C)
model = convoys.regression.Exponential(ci=True)
model = convoys.regression.Exponential(mcmc=True)
model.fit(X, B, T)
assert 0.80*c < model.predict([1], float('inf')) < 1.30*c
for t in [1, 3, 10]:
Expand All @@ -126,7 +126,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
assert 0.70*d < (convert_times < t).mean() < 1.30*d

# Fit a linear model
model = convoys.regression.Exponential(ci=False, flavor='linear')
model = convoys.regression.Exponential(mcmc=False, flavor='linear')
model.fit(X, B, T)
model_c = model.params['map']['b'] + model.params['map']['beta'][0]
assert 0.9*c < model_c < 1.1*c
Expand Down Expand Up @@ -162,7 +162,7 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7],
assert 0.80 * c < model.predict(x, float('inf')) < 1.30 * c

# Fit a linear model
model = convoys.regression.Weibull(ci=False, flavor='linear')
model = convoys.regression.Weibull(mcmc=False, flavor='linear')
model.fit(X, B, T)
model_cs = model.params['map']['b'] + model.params['map']['beta']
for model_c, c in zip(model_cs, cs):
Expand All @@ -184,7 +184,7 @@ def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=10000):
assert 0.80*k < numpy.mean(model.params['map']['k']) < 1.30*k

# Fit a linear model
model = convoys.regression.Gamma(ci=False, flavor='linear')
model = convoys.regression.Gamma(mcmc=False, flavor='linear')
model.fit(X, B, T)
model_c = model.params['map']['b'] + model.params['map']['beta'][0]
assert 0.9*c < model_c < 1.1*c
Expand All @@ -201,7 +201,7 @@ def test_linear_model(n=10000, m=5, k=3.0, lambd=0.1):
E = numpy.array([sample_weibull(k, lambd) for r in range(n)])
B, T = generate_censored_data(N, E, C)

model = convoys.regression.Weibull(ci=False, flavor='linear')
model = convoys.regression.Weibull(mcmc=False, flavor='linear')
model.fit(X, B, T)

# Check the fitted parameters
Expand Down Expand Up @@ -365,7 +365,7 @@ def _test_plot_cohorts(model='weibull', extra_model=None):
def test_plot_cohorts_model():
df = _generate_dataframe()
unit, groups, (G, B, T) = convoys.utils.get_arrays(df)
model = convoys.multi.Exponential(ci=None)
model = convoys.multi.Exponential(mcmc=None)
model.fit(G, B, T)
matplotlib.pyplot.clf()
convoys.plotting.plot_cohorts(G, B, T, model=model, groups=groups)
Expand Down

0 comments on commit 683e08c

Please sign in to comment.