Merge 656f6ea into b5bbdaa

better · Aug 22, 2018 · c0b1736 · c0b1736
2 parents b5bbdaa + 656f6ea
commit c0b1736
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 9 deletions.
diff --git a/convoys/regression.py b/convoys/regression.py
@@ -172,15 +172,32 @@ def fit(self, X, B, T, W=None, fix_k=None, fix_p=None):
         x0[1] = -1 if fix_p is None else log(fix_p)
         args = (X, B, T, W, fix_k, fix_p)
 
+        # Callback for progress to stdout
+        n_iterations = [0]  # Dumb hack to make the scope right
+        sys.stdout.write('\n')
+        def callback(x):
+            n_iterations[0] += 1
+            sys.stdout.write('Finding MAP: %13d\r' %
+                             n_iterations[0])
+            sys.stdout.flush()
+
         # Find the maximum a posteriori of the distribution
         res = scipy.optimize.minimize(
             lambda x: -generalized_gamma_LL(x, *args),
             x0,
             jac=autograd.grad(lambda x: -generalized_gamma_LL(x, *args)),
             method='SLSQP',
+            callback=callback,
         )
+        sys.stdout.write('\n')
         result = {'map': res.x}
 
+        # TODO: should not use fixed k/p as search parameters
+        if fix_k:
+            result['map'][0] = log(fix_k)
+        if fix_p:
+            result['map'][1] = log(fix_p)
+
         # Let's sample from the posterior to compute uncertainties
         if self._ci:
             dim, = res.x.shape
@@ -194,17 +211,20 @@ def fit(self, X, B, T, W=None, fix_k=None, fix_p=None):
             mcmc_initial_noise = 1e-3
             p0 = [result['map'] + mcmc_initial_noise * numpy.random.randn(dim)
                   for i in range(n_walkers)]
-            n_burnin = 20
+            n_burnin = 200
             n_steps = numpy.ceil(1000. / n_walkers)
             n_iterations = n_burnin + n_steps
-            sys.stdout.write('\n')
             for i, _ in enumerate(sampler.sample(p0, iterations=n_iterations)):
-                sys.stdout.write('MCMC (%d walkers): %6d/%-6d (%6.2f%%)\r' % (
+                sys.stdout.write('MCMC (%3d walkers): %6d/%-6d (%6.2f%%)\r' % (
                         n_walkers, i+1, n_iterations, 100.*(i+1)/n_iterations))
                 sys.stdout.flush()
             sys.stdout.write('\n')
             result['samples'] = sampler.chain[:, n_burnin:, :] \
                                        .reshape((-1, dim)).T
+            if fix_k:
+                result['samples'][0, :] = log(fix_k)
+            if fix_p:
+                result['samples'][1, :] = log(fix_p)
 
         self.params = {k: {
             'k': exp(data[0]),

diff --git a/examples/dob_violations.py b/examples/dob_violations.py
@@ -37,9 +37,9 @@ def run():
         converted='disposition_date',
         unit='years', group_min_size=500)
     convoys.plotting.plot_cohorts(G, B, T, model='kaplan-meier',
-                                  groups=groups, t_max=30)
+                                  groups=groups, t_max=30, ci=0.95)
     convoys.plotting.plot_cohorts(G, B, T, model='weibull',
-                                  groups=groups, t_max=30, ci=0.95,
+                                  groups=groups, t_max=30,
                                   plot_kwargs={'linestyle': '--'})
     pyplot.legend()
     pyplot.xlabel(unit)

diff --git a/test_convoys.py b/test_convoys.py
@@ -117,17 +117,14 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7],
                      for r in range(n)])
     B, T = generate_censored_data(N, E, C)
 
-    model = convoys.regression.Weibull(ci=True)
+    model = convoys.regression.Weibull()
     model.fit(X, B, T)
 
     # Validate shape of results
     x = numpy.ones((len(cs),))
     assert model.cdf(x, float('inf')).shape == ()
-    assert model.cdf(x, float('inf'), ci=0.95).shape == (3,)
     assert model.cdf(x, 1).shape == ()
-    assert model.cdf(x, 1, ci=True).shape == (3,)
     assert model.cdf(x, [1, 2, 3, 4]).shape == (4,)
-    assert model.cdf(x, [1, 2, 3, 4], ci=True).shape == (4, 3)
 
     # Check results
     for r, c in enumerate(cs):