diff --git a/convoys/regression.py b/convoys/regression.py index 23fa432..bb53e20 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -172,15 +172,32 @@ def fit(self, X, B, T, W=None, fix_k=None, fix_p=None): x0[1] = -1 if fix_p is None else log(fix_p) args = (X, B, T, W, fix_k, fix_p) + # Callback for progress to stdout + n_iterations = [0] # Dumb hack to make the scope right + sys.stdout.write('\n') + def callback(x): + n_iterations[0] += 1 + sys.stdout.write('Finding MAP: %13d\r' % + n_iterations[0]) + sys.stdout.flush() + # Find the maximum a posteriori of the distribution res = scipy.optimize.minimize( lambda x: -generalized_gamma_LL(x, *args), x0, jac=autograd.grad(lambda x: -generalized_gamma_LL(x, *args)), method='SLSQP', + callback=callback, ) + sys.stdout.write('\n') result = {'map': res.x} + # TODO: should not use fixed k/p as search parameters + if fix_k: + result['map'][0] = log(fix_k) + if fix_p: + result['map'][1] = log(fix_p) + # Let's sample from the posterior to compute uncertainties if self._ci: dim, = res.x.shape @@ -194,17 +211,20 @@ def fit(self, X, B, T, W=None, fix_k=None, fix_p=None): mcmc_initial_noise = 1e-3 p0 = [result['map'] + mcmc_initial_noise * numpy.random.randn(dim) for i in range(n_walkers)] - n_burnin = 20 + n_burnin = 200 n_steps = numpy.ceil(1000. / n_walkers) n_iterations = n_burnin + n_steps - sys.stdout.write('\n') for i, _ in enumerate(sampler.sample(p0, iterations=n_iterations)): - sys.stdout.write('MCMC (%d walkers): %6d/%-6d (%6.2f%%)\r' % ( + sys.stdout.write('MCMC (%3d walkers): %6d/%-6d (%6.2f%%)\r' % ( n_walkers, i+1, n_iterations, 100.*(i+1)/n_iterations)) sys.stdout.flush() sys.stdout.write('\n') result['samples'] = sampler.chain[:, n_burnin:, :] \ .reshape((-1, dim)).T + if fix_k: + result['samples'][0, :] = log(fix_k) + if fix_p: + result['samples'][1, :] = log(fix_p) self.params = {k: { 'k': exp(data[0]), diff --git a/examples/dob_violations.py b/examples/dob_violations.py index 68a9e39..277009a 100644 --- a/examples/dob_violations.py +++ b/examples/dob_violations.py @@ -37,9 +37,9 @@ def run(): converted='disposition_date', unit='years', group_min_size=500) convoys.plotting.plot_cohorts(G, B, T, model='kaplan-meier', - groups=groups, t_max=30) + groups=groups, t_max=30, ci=0.95) convoys.plotting.plot_cohorts(G, B, T, model='weibull', - groups=groups, t_max=30, ci=0.95, + groups=groups, t_max=30, plot_kwargs={'linestyle': '--'}) pyplot.legend() pyplot.xlabel(unit) diff --git a/test_convoys.py b/test_convoys.py index 4ba4f9f..c383390 100644 --- a/test_convoys.py +++ b/test_convoys.py @@ -117,17 +117,14 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], for r in range(n)]) B, T = generate_censored_data(N, E, C) - model = convoys.regression.Weibull(ci=True) + model = convoys.regression.Weibull() model.fit(X, B, T) # Validate shape of results x = numpy.ones((len(cs),)) assert model.cdf(x, float('inf')).shape == () - assert model.cdf(x, float('inf'), ci=0.95).shape == (3,) assert model.cdf(x, 1).shape == () - assert model.cdf(x, 1, ci=True).shape == (3,) assert model.cdf(x, [1, 2, 3, 4]).shape == (4,) - assert model.cdf(x, [1, 2, 3, 4], ci=True).shape == (4, 3) # Check results for r, c in enumerate(cs):