From a032076f55485d0395f295f495b3a8d4814e2ff4 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Mon, 19 Feb 2018 20:11:17 -0500
Subject: [PATCH 1/6] remove basic & simple binomial

---
 convoys/__init__.py | 55 ---------------------------------------------
 1 file changed, 55 deletions(-)

diff --git a/convoys/__init__.py b/convoys/__init__.py
index 6d27556..60d45f9 100644
--- a/convoys/__init__.py
+++ b/convoys/__init__.py
@@ -72,59 +72,6 @@ def predict_time(self, ci=None):
         pass
 
 
-class SimpleBinomial(Model):
-    def fit(self, C, N, B):
-        self.params['a'] = len(B)
-        self.params['b'] = sum(B)
-
-    def predict(self, ts, ci=None):
-        a, b = self.params['a'], self.params['b']
-        def rep(x):
-            return numpy.ones(len(ts)) * x
-        if ci is not None:
-            return ts, rep(b/a), rep(scipy.stats.beta.ppf((1 - ci)/2, b, a-b)), rep(scipy.stats.beta.ppf((1 + ci)/2, b, a-b))
-        else:
-            return ts, rep(b/a)
-
-    def predict_final(self, ci=None):
-        a, b = self.params['a'], self.params['b']
-        if ci is not None:
-            return b/a, scipy.stats.beta.ppf((1 - ci)/2, b, a-b), scipy.stats.beta.ppf((1 + ci)/2, b, a-b)
-        else:
-            return b/a
-
-    def predict_time(self):
-        return 0.0
-
-
-class Basic(Model):
-    def fit(self, C, N, B, n_limit=30):
-        n, k = len(C), 0
-        self.ts = [0]
-        self.ns = [n]
-        self.ks = [k]
-        events = [(c, 1, 0) for c, n, b in zip(C, N, B) if b] + \
-                 [(n, -int(b), -1) for c, n, b in zip(C, N, B)]
-        for t, k_delta, n_delta in sorted(events):
-            k += k_delta
-            n += n_delta
-            self.ts.append(t)
-            self.ks.append(k)
-            self.ns.append(n)
-            if n < n_limit:
-                break
-
-    def predict(self, ts, ci=None):
-        js = [bisect.bisect_left(self.ts, t) for t in ts]
-        ks = numpy.array([self.ks[j] for j in js if j < len(self.ks)])
-        ns = numpy.array([self.ns[j] for j in js if j < len(self.ns)])
-        ts = numpy.array([ts[j] for j in js if j < len(self.ns)])
-        if ci is not None:
-            return ts, ks / ns, scipy.stats.beta.ppf((1 - ci)/2, ks, ns-ks), scipy.stats.beta.ppf((1 + ci)/2, ks, ns-ks)
-        else:
-            return ts, ks / ns
-
-
 class KaplanMeier(Model):
     def fit(self, C, N, B):
         T = [c if b else n for c, n, b in zip(C, N, B)]
@@ -364,9 +311,7 @@ def split_by_group(data, group_min_size, max_groups):
 
 
 _models = {
-    'basic': Basic,
     'kaplan-meier': KaplanMeier,
-    'simple-binomial': SimpleBinomial,
     'exponential': Exponential,
     'weibull': Weibull,
     'gamma': Gamma,

From 742f61c69697824e65e5841fdf781657be517ed5 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Mon, 19 Feb 2018 21:05:48 -0500
Subject: [PATCH 2/6] factor out model into a separate file

---
 convoys/__init__.py   | 23 +----------------------
 convoys/model.py      | 24 ++++++++++++++++++++++++
 convoys/regression.py |  2 +-
 3 files changed, 26 insertions(+), 23 deletions(-)
 create mode 100644 convoys/model.py

diff --git a/convoys/__init__.py b/convoys/__init__.py
index 60d45f9..4baa1bd 100644
--- a/convoys/__init__.py
+++ b/convoys/__init__.py
@@ -12,6 +12,7 @@
 from autograd.scipy.special import expit, gamma, gammainc, gammaincc, gammaln
 from autograd.numpy import exp, log, sum
 from matplotlib import pyplot
+from convoys.model import Model
 
 LOG_EPS = 1e-12  # Used for log likelihood
 
@@ -50,28 +51,6 @@ def get_arrays(data, t_converter):
     return numpy.array(C), numpy.array(N), numpy.array(B)
 
 
-@six.add_metaclass(abc.ABCMeta)
-class Model():
-    def __init__(self, params={}):
-        self.params = params
-
-    @abc.abstractmethod
-    def fit(self, C, N, B):
-        pass
-
-    @abc.abstractmethod
-    def predict(self, ts, ci=None):
-        pass
-
-    @abc.abstractmethod
-    def predict_final(self, ci=None):
-        pass
-
-    @abc.abstractmethod
-    def predict_time(self, ci=None):
-        pass
-
-
 class KaplanMeier(Model):
     def fit(self, C, N, B):
         T = [c if b else n for c, n, b in zip(C, N, B)]
diff --git a/convoys/model.py b/convoys/model.py
new file mode 100644
index 0000000..ce96a12
--- /dev/null
+++ b/convoys/model.py
@@ -0,0 +1,24 @@
+import abc
+import six
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Model():
+    def __init__(self, params={}):
+        self.params = params
+
+    @abc.abstractmethod
+    def fit(self, C, N, B):
+        pass
+
+    @abc.abstractmethod
+    def predict(self, ts, ci=None):
+        pass
+
+    @abc.abstractmethod
+    def predict_final(self, ci=None):
+        pass
+
+    @abc.abstractmethod
+    def predict_time(self, ci=None):
+        pass
diff --git a/convoys/regression.py b/convoys/regression.py
index a8e321b..a5a2e4f 100644
--- a/convoys/regression.py
+++ b/convoys/regression.py
@@ -3,7 +3,7 @@
 import scipy.stats
 import tensorflow as tf
 
-from convoys import Model
+from convoys.model import Model
 
 class Regression(Model):
     # This will replace the model in __init__.py soon.

From 11e08c0a14d8c5f7109c76435abe5d90b9d39de4 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Mon, 19 Feb 2018 21:38:33 -0500
Subject: [PATCH 3/6] switch to use the new regression models for everything

still fits them per-group in the plotting functions, which is a bit dumb, but will fix that later
---
 convoys/__init__.py   | 213 +++++-------------------------------------
 convoys/model.py      |  11 +--
 convoys/regression.py |  20 ++--
 test_convoys.py       |  46 +--------
 4 files changed, 42 insertions(+), 248 deletions(-)

diff --git a/convoys/__init__.py b/convoys/__init__.py
index 4baa1bd..b1cb182 100644
--- a/convoys/__init__.py
+++ b/convoys/__init__.py
@@ -13,15 +13,7 @@
 from autograd.numpy import exp, log, sum
 from matplotlib import pyplot
 from convoys.model import Model
-
-LOG_EPS = 1e-12  # Used for log likelihood
-
-
-def log1pexp(x):
-    # returns log(1 + exp(x))
-    p, q = min(x, 0), max(x, 0)
-    return q + log(exp(p-q) + 1)
-
+from convoys.regression import ExponentialRegression, WeibullRegression, GammaRegression
 
 
 def get_timescale(t):
@@ -42,18 +34,16 @@ def get_timedelta_converter(t_factor):
 
 
 def get_arrays(data, t_converter):
-    C = [t_converter(converted_at - created_at) if converted_at is not None else 1.0
-         for created_at, converted_at, now in data]
-    N = [t_converter(now - created_at)
-         for created_at, converted_at, now in data]
+    X = numpy.ones((len(data), 1))
     B = [bool(converted_at is not None)
          for created_at, converted_at, now in data]
-    return numpy.array(C), numpy.array(N), numpy.array(B)
+    T = [t_converter(converted_at - created_at) if converted_at is not None else t_converter(now - created_at)
+         for created_at, converted_at, now in data]
+    return X, numpy.array(B), numpy.array(T)
 
 
 class KaplanMeier(Model):
-    def fit(self, C, N, B):
-        T = [c if b else n for c, n, b in zip(C, N, B)]
+    def fit(self, X, B, T):
         kmf = lifelines.KaplanMeierFitter()
         kmf.fit(T, event_observed=B)
         self.ts = kmf.survival_function_.index.values
@@ -61,7 +51,7 @@ def fit(self, C, N, B):
         self.ps_hi = 1.0 - kmf.confidence_interval_['KM_estimate_lower_0.95'].values
         self.ps_lo = 1.0 - kmf.confidence_interval_['KM_estimate_upper_0.95'].values
 
-    def predict(self, ts, ci=None):
+    def predict(self, x, ts, ci=None):
         js = [bisect.bisect_left(self.ts, t) for t in ts]
         def array_lookup(a):
             return numpy.array([a[j] for j in js if j < len(self.ts)])
@@ -70,13 +60,13 @@ def array_lookup(a):
         else:
             return (array_lookup(self.ts), array_lookup(self.ps))
 
-    def predict_final(self, ci=None):
+    def predict_final(self, x, ci=None):
         if ci is not None:
             return (self.ps[-1], self.ps_lo[-1], self.ps_hi[-1])
         else:
             return self.ps[-1]
 
-    def predict_time(self, ci=None):
+    def predict_time(self, x, ci=None):
         # TODO: should not use median here, but mean is no good
         def median(ps):
             i = bisect.bisect_left(ps, 0.5)
@@ -87,167 +77,12 @@ def median(ps):
             return median(self.ps)
 
 
-def fit_beta(c, fc):
-    # Approximate a Beta distribution for c by fitting two things
-    # 1. second derivative wrt c should match the second derivative of a beta
-    #   LL = (a-1)log(c) + (b-1)log(1-c)
-    #   dLL = (a-1)/c - (b-1)/(1-c)
-    #   ddLL = -(a-1)/c^2 - (b-1)/(1-c)^2 = -h
-    #   a(-1/c^2) + b(-1/(1-c)^2) = -h - 1/c^2 - 1/(1-c)^2
-    # 2. mode should match c, i.e. (a-1)/(a+b-2) = c <=> a-1 = c(a+b-2)
-    #   a(1-c) - bc = 1-2c
-    h = grad(grad(fc))(c)
-    M = numpy.array([[-1/c**2, -1/(1-c)**2],
-                     [(1-c), -c]])
-    q = numpy.array([-h - 1/c**2 - 1/(1-c)**2, 1 - 2*c])
-    try:
-        a, b = numpy.linalg.solve(M, q)
-    except numpy.linalg.linalg.LinAlgError:
-        a, b = 1, 1
-    return a, b
-
-
-class Exponential(Model):
-    def fit(self, C, N, B):
-        def transform(x):
-            p, q = x
-            return (expit(p), log1pexp(q))
-        def f(x):
-            c, lambd = x
-            LL_observed = log(c) + log(lambd) - lambd*C
-            LL_censored = log((1 - c) + c * exp(-lambd*N))
-            neg_LL = -sum(B * LL_observed + (1 - B) * LL_censored)
-            return neg_LL
-
-        g = lambda x: f(transform(x))
-        res = scipy.optimize.minimize(
-            fun=g,
-            jac=jacobian(g),
-            hess=hessian(g),
-            x0=(0, 0),
-            method='trust-ncg')
-        c, lambd = transform(res.x)
-        fc = lambda c: f((c, lambd))
-        a, b = fit_beta(c, fc)
-        self.params = dict(a=a, b=b, lambd=lambd)
-
-    def predict(self, ts, ci=None):
-        a, b, lambd = self.params['a'], self.params['b'], self.params['lambd']
-        y = 1 - exp(-ts * lambd)
-        if ci is not None:
-            return ts, a / (a + b) * y, scipy.stats.beta.ppf((1 - ci)/2, a, b) * y, scipy.stats.beta.ppf((1 + ci)/2, a, b) * y
-        else:
-            return ts, y
-
-    def predict_final(self, ci=None):
-        a, b = self.params['a'], self.params['b']
-        if not ci:
-            return a / (a + b)
-        else:
-            return (a / (a + b),
-                    scipy.stats.beta.ppf((1 - ci)/2, a, b),
-                    scipy.stats.beta.ppf((1 + ci)/2, a, b))
-
-    def predict_time(self):
-        return 1.0 / self.params['lambd']
-
-
-class Weibull(Model):
-    def fit(self, C, N, B):
-        def transform(x):
-            p, q, r = x
-            return (expit(p), log1pexp(q), log1pexp(r))
-        def f(x):
-            c, lambd, k = x
-            # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
-            LL_observed = log(c) + log(k) + log(lambd) + (k-1)*(log(C) + log(lambd)) - (C*lambd)**k
-            # CDF of Weibull: 1 - exp(-(t * lambda)^k)
-            LL_censored = log((1-c) + c * exp(-(N*lambd)**k))
-            neg_LL = -sum(B * LL_observed + (1 - B) * LL_censored)
-            return neg_LL
-
-        g = lambda x: f(transform(x))
-        res = scipy.optimize.minimize(
-            fun=g,
-            jac=jacobian(g),
-            hess=hessian(g),
-            x0=(0, 0, 0),
-            method='trust-ncg')
-        c, lambd, k = transform(res.x)
-        fc = lambda c: f((c, lambd, k))
-        a, b = fit_beta(c, fc)
-        self.params = dict(a=a, b=b, lambd=lambd, k=k)
-
-    def predict(self, ts, ci=None):
-        a, b, lambd, k = self.params['a'], self.params['b'], self.params['lambd'], self.params['k']
-        y = 1 - exp(-(ts*lambd)**k)
-        if ci is not None:
-            return ts, a / (a + b) * y, scipy.stats.beta.ppf((1 - ci)/2, a, b) * y, scipy.stats.beta.ppf((1 + ci)/2, a, b) * y
-        else:
-            return ts, y
-
-    def predict_final(self, ci=None):
-        a, b = self.params['a'], self.params['b']
-        if ci is not None:
-            return a / (a + b), scipy.stats.beta.ppf((1 - ci)/2, a, b), scipy.stats.beta.ppf((1 + ci)/2, a, b)
-        else:
-            return a / (a + b)
-
-    def predict_time(self):
-        return gamma(1 + 1./self.params['k']) / self.params['lambd']
-
-
-class Gamma(Model):
-    def fit(self, C, N, B):
-        # TODO(erikbern): should compute Jacobian of this one
-        def transform(x):
-            p, q, r = x
-            return (expit(p), log1pexp(q), log1pexp(r))
-        def f(x):
-            c, lambd, k = x
-            neg_LL = 0
-            # PDF of gamma: 1.0 / gamma(k) * lambda ^ k * t^(k-1) * exp(-t * lambda)
-            LL_observed = log(c) - gammaln(k) + k*log(lambd) + (k-1)*log(C + LOG_EPS) - lambd*C
-            # CDF of gamma: gammainc(k, lambda * t)
-            LL_censored = log((1-c) + c * gammaincc(k, lambd*N) + LOG_EPS)
-            neg_LL = -sum(B * LL_observed + (1 - B) * LL_censored)
-            return neg_LL
-
-        g = lambda x: f(transform(x))
-        res = scipy.optimize.minimize(
-            fun=g,
-            x0=(0, 0, 0),
-            method='Nelder-Mead')
-        c, lambd, k = transform(res.x)
-        fc = lambda c: f((c, lambd, k))
-        a, b = fit_beta(c, fc)
-        self.params = dict(a=a, b=b, lambd=lambd, k=k)
-
-    def predict(self, ts, ci=None):
-        a, b, lambd, k = self.params['a'], self.params['b'], self.params['lambd'], self.params['k']
-        y = gammainc(k, lambd*ts)
-        if ci is not None:
-            return ts, a / (a + b) * y, scipy.stats.beta.ppf((1 - ci)/2, a, b) * y, scipy.stats.beta.ppf((1 + ci)/2, a, b) * y
-        else:
-            return ts, y
-
-    def predict_final(self, ci=None):
-        a, b = self.params['a'], self.params['b']
-        if ci is not None:
-            return a / (a + b), scipy.stats.beta.ppf((1 - ci)/2, a, b), scipy.stats.beta.ppf((1 + ci)/2, a, b)
-        else:
-            return a / (a + b)
-
-    def predict_time(self):
-        return self.params['k'] / self.params['lambd']
-
-
-def sample_event(model, t, hi=1e3):
+def sample_event(model, x, t, hi=1e3):
     # We are now at time t. Generate a random event whether the user is going to convert or not
     # TODO: this is a hacky thing until we have a "invert CDF" method on each model
     def pred(t):
         ts = numpy.array([t])
-        return model.predict(ts)[1][-1]
+        return model.predict(x, ts)[1][-1]
     y = pred(t)
     r = y + random.random() * (1 - y)
     if pred(hi) < r:
@@ -291,9 +126,9 @@ def split_by_group(data, group_min_size, max_groups):
 
 _models = {
     'kaplan-meier': KaplanMeier,
-    'exponential': Exponential,
-    'weibull': Weibull,
-    'gamma': Gamma,
+    'exponential': ExponentialRegression,
+    'weibull': WeibullRegression,
+    'gamma': GammaRegression,
 }
 
 def plot_cohorts(data, t_max=None, title=None, group_min_size=0, max_groups=100, model='kaplan-meier', projection=None):
@@ -310,24 +145,24 @@ def plot_cohorts(data, t_max=None, title=None, group_min_size=0, max_groups=100,
     colors = seaborn.color_palette('hls', len(groups))
     y_max = 0
     for group, color in zip(sorted(groups), colors):
-        C, N, B = get_arrays(js[group], t_converter)
+        X, B, T = get_arrays(js[group], t_converter)
         t = numpy.linspace(0, t_max, 1000)
 
         m = _models[model]()
-        m.fit(C, N, B)
+        m.fit(X, B, T)
 
         label = '%s (n=%.0f, k=%.0f)' % (group, len(B), sum(B))
 
         if projection:
             p = _models[projection]()
-            p.fit(C, N, B)
-            p_t, p_y, p_y_lo, p_y_hi = p.predict(t, ci=0.95)
-            p_y_final, p_y_lo_final, p_y_hi_final = p.predict_final(ci=0.95)
+            p.fit(X, B, T)
+            p_t, p_y, p_y_lo, p_y_hi = p.predict([1], t, ci=0.95)
+            p_y_final, p_y_lo_final, p_y_hi_final = p.predict_final([1], ci=0.95)
             label += ' projected: %.2f%% (%.2f%% - %.2f%%)' % (100.*p_y_final, 100.*p_y_lo_final, 100.*p_y_hi_final)
             pyplot.plot(p_t, 100. * p_y, color=color, linestyle=':', alpha=0.7)
             pyplot.fill_between(p_t, 100. * p_y_lo, 100. * p_y_hi, color=color, alpha=0.2)
 
-        m_t, m_y = m.predict(t)
+        m_t, m_y = m.predict([1], t)
         pyplot.plot(m_t, 100. * m_y, color=color, label=label)
         y_max = max(y_max, 110. * max(m_y))
 
@@ -370,17 +205,17 @@ def plot_timeseries(data, window, model='kaplan-meier', group_min_size=0, max_gr
             data = js[group][i1:i2]
             t1 += stride
 
-            C, N, B = get_arrays(data, t_converter)
+            X, B, T = get_arrays(data, t_converter)
             if sum(B) < window_min_size:
                 continue
 
             p = _models[model]()
-            p.fit(C, N, B)
+            p.fit(X, B, T)
 
             if time:
-                y, y_lo, y_hi = p.predict_time(ci=0.95)
+                y, y_lo, y_hi = p.predict_time([1], ci=0.95)
             else:
-                y, y_lo, y_hi = p.predict_final(ci=0.95)
+                y, y_lo, y_hi = p.predict_final([1], ci=0.95)
             print('%30s %40s %.4f %.4f %.4f' % (group, t1, y, y_lo, y_hi))
             ts.append(t2)
             ys.append(y)
diff --git a/convoys/model.py b/convoys/model.py
index ce96a12..0f7eb3f 100644
--- a/convoys/model.py
+++ b/convoys/model.py
@@ -4,21 +4,18 @@
 
 @six.add_metaclass(abc.ABCMeta)
 class Model():
-    def __init__(self, params={}):
-        self.params = params
-
     @abc.abstractmethod
-    def fit(self, C, N, B):
+    def fit(self, X, B, T):
         pass
 
     @abc.abstractmethod
-    def predict(self, ts, ci=None):
+    def predict(self, x, ts, ci=None):
         pass
 
     @abc.abstractmethod
-    def predict_final(self, ci=None):
+    def predict_final(self, x, ci=None):
         pass
 
     @abc.abstractmethod
-    def predict_time(self, ci=None):
+    def predict_time(self, x, ci=None):
         pass
diff --git a/convoys/regression.py b/convoys/regression.py
index a5a2e4f..0d71271 100644
--- a/convoys/regression.py
+++ b/convoys/regression.py
@@ -14,9 +14,11 @@ def __init__(self, log_pdf, cdf, extra_params, L2_reg=1.0):
         self._extra_params = extra_params
         self._sess = tf.Session()
 
-        # TODO: this assumes scalar inputs... should figure out a way to support arbitrary shapes
-        T_input = tf.placeholder(tf.float32, [])
-        self._cdf_f = lambda t: self._sess.run(cdf(T_input), feed_dict={T_input: t})
+        # TODO: this seems a bit dumb... is there no better way to support arbitrary number of dims?
+        T_scalar_input = tf.placeholder(tf.float32, [])
+        self._cdf_scalar_f = lambda t: self._sess.run(cdf(T_scalar_input), feed_dict={T_scalar_input: t})
+        T_vector_input = tf.placeholder(tf.float32, [None])
+        self._cdf_vector_f = lambda t: self._sess.run(cdf(T_vector_input), feed_dict={T_vector_input: t})
 
     def __del__(self):
         self._sess.close()
@@ -69,14 +71,18 @@ def fit(self, X, B, T):
         )
         print(self.params)
 
-    def predict(self, t, x, ci=None):
-        z = self._cdf_f(t)
+    def predict(self, x, t, ci=None):
+        t = numpy.array(t)
+        if len(t.shape) == 0:
+            z = self._cdf_scalar_f(t)
+        elif len(t.shape) == 1:
+            z = self._cdf_vector_f(t)
         if ci:
             c, c_lo, c_hi = self.predict_final(x, ci)
-            return (c*z, c_lo*z, c_hi*z)
+            return (t, c*z, c_lo*z, c_hi*z)
         else:
             c = self.predict_final(x)
-            return c*z
+            return (t, c*z)
 
     def predict_final(self, x, ci=None):
         # TODO: should take advantage of tensorflow here!!!
diff --git a/test_convoys.py b/test_convoys.py
index ecb3c39..fb82216 100644
--- a/test_convoys.py
+++ b/test_convoys.py
@@ -9,56 +9,12 @@
 import convoys.regression
 
 
-def test_exponential_model(c=0.3, lambd=0.1, n=10000):
-    # With a really long observation window, the rate should converge to the measured
-    C = numpy.array([random.random() < c and scipy.stats.expon.rvs(scale=1.0/lambd) or 0.0 for x in range(n)])
-    N = numpy.array([100 for converted_at in C])
-    B = numpy.array([bool(converted_at > 0) for converted_at in C])
-    c = numpy.mean(B)
-    model = convoys.Exponential()
-    model.fit(C, N, B)
-    assert 0.95*c < model.predict_final() < 1.05*c
-    assert 0.95*lambd < model.params['lambd'] < 1.05*lambd
-
-    # Check the confidence intervals
-    y, y_lo, y_hi = model.predict_final(ci=0.95)
-    c_lo = scipy.stats.beta.ppf(0.025, n*c, n*(1-c))
-    c_hi = scipy.stats.beta.ppf(0.975, n*c, n*(1-c))
-    assert 0.95*c < y < 1.05 * c
-    assert 0.95*c_lo < y_lo < 1.05 * c_lo
-    assert 0.95*c_hi < y_hi < 1.05 * c_hi
-
-
-def test_gamma_model(c=0.3, lambd=0.1, k=3.0, n=100000):
-    C = numpy.array([random.random() < c and scipy.stats.gamma.rvs(a=k, scale=1.0/lambd) or 0.0 for x in range(n)])
-    N = numpy.array([1000 for converted_at in C])
-    B = numpy.array([bool(converted_at > 0) for converted_at in C])
-    c = numpy.mean(B)
-    model = convoys.Gamma()
-    model.fit(C, N, B)
-    assert 0.95*c < model.predict_final() < 1.05*c
-    assert 0.95*lambd < model.params['lambd'] < 1.05*lambd
-    assert 0.95*k < model.params['k'] < 1.05*k
-
-
 def sample_weibull(k, lambd):
     # scipy.stats is garbage for this
     # exp(-(x * lambda)^k) = y
     return (-numpy.log(random.random())) ** (1.0/k) / lambd
 
 
-def test_weibull_model(c=0.3, lambd=0.1, k=0.5, n=100000):
-    B = numpy.array([bool(random.random() < c) for x in range(n)])
-    C = numpy.array([b and sample_weibull(k, lambd) or 1.0 for b in B])
-    N = numpy.array([1000 for b in B])
-    c = numpy.mean(B)
-    model = convoys.Weibull()
-    model.fit(C, N, B)
-    assert 0.95*c < model.predict_final() < 1.05*c
-    assert 0.95*lambd < model.params['lambd'] < 1.05*lambd
-    assert 0.95*k < model.params['k'] < 1.05*k
-
-
 def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
     # With a really long observation window, the rate should converge to the measured
     X = numpy.ones((n, 1))
@@ -71,7 +27,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
     assert 0.95*lambd < model.params['lambd'] < 1.05*lambd
     t = 10
     d = 1 - numpy.exp(-lambd*t)
-    assert 0.95*c*d < model.predict(t, [1]) < 1.05*c*d
+    assert 0.95*c*d < model.predict([1], t)[1] < 1.05*c*d
 
     # Check the confidence intervals
     y, y_lo, y_hi = model.predict_final([1], ci=0.95)

From b92e6977ce2d30db9692c5a1f6513c1e9ea7ac68 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Mon, 19 Feb 2018 21:39:40 -0500
Subject: [PATCH 4/6] drop autograd

---
 convoys/__init__.py | 4 ----
 requirements.txt    | 1 -
 2 files changed, 5 deletions(-)

diff --git a/convoys/__init__.py b/convoys/__init__.py
index b1cb182..c46fc1f 100644
--- a/convoys/__init__.py
+++ b/convoys/__init__.py
@@ -6,11 +6,7 @@
 import numpy
 import random
 import seaborn
-import scipy.optimize
 import six
-from autograd import jacobian, hessian, grad
-from autograd.scipy.special import expit, gamma, gammainc, gammaincc, gammaln
-from autograd.numpy import exp, log, sum
 from matplotlib import pyplot
 from convoys.model import Model
 from convoys.regression import ExponentialRegression, WeibullRegression, GammaRegression
diff --git a/requirements.txt b/requirements.txt
index 4ba7edb..4d5f04f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-autograd==1.2
 lifelines==0.11.2
 matplotlib>=2.0.0
 numpy

From bffef76ed7affbd1bcb855c4f7396e5f2d8822ae Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Mon, 19 Feb 2018 21:44:49 -0500
Subject: [PATCH 5/6] remove print

---
 convoys/regression.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/convoys/regression.py b/convoys/regression.py
index 0d71271..2ac994e 100644
--- a/convoys/regression.py
+++ b/convoys/regression.py
@@ -69,7 +69,6 @@ def fit(self, X, B, T):
             ),
             **self._extra_params(self._sess)
         )
-        print(self.params)
 
     def predict(self, x, t, ci=None):
         t = numpy.array(t)

From c152e19181968c8a40baf9fd70c1759ff2c68821 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Mon, 19 Feb 2018 22:08:53 -0500
Subject: [PATCH 6/6] more robust test & learning rate scheme

---
 convoys/regression.py | 13 ++++++++-----
 test_convoys.py       | 11 +++++------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/convoys/regression.py b/convoys/regression.py
index 2ac994e..437d8db 100644
--- a/convoys/regression.py
+++ b/convoys/regression.py
@@ -41,25 +41,28 @@ def fit(self, X, B, T):
         LL = tf.reduce_sum(B_input * LL_observed + (1 - B_input) * LL_censored, 0)
         LL_penalized = LL - self._L2_reg * tf.reduce_sum(beta * beta, 0)
 
-        step_var = tf.Variable(0, trainable=False)
-        learning_rate = tf.train.exponential_decay(0.03, step_var, 1, 0.999)
-        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(-LL_penalized, global_step=step_var)
+        learning_rate_input = tf.placeholder(tf.float32, [])
+        optimizer = tf.train.AdamOptimizer(learning_rate_input).minimize(-LL_penalized)
 
         # TODO(erikbern): this is going to add more and more variables every time we run this
         self._sess.run(tf.global_variables_initializer())
 
         best_cost, best_step, step = float('-inf'), 0, 0
+        learning_rate = 0.1
         while True:
-            feed_dict = {X_input: X, B_input: B, T_input: T}
+            feed_dict = {X_input: X, B_input: B, T_input: T, learning_rate_input: learning_rate}
             self._sess.run(optimizer, feed_dict=feed_dict)
             cost = self._sess.run(LL_penalized, feed_dict=feed_dict)
             if cost > best_cost:
                 best_cost, best_step = cost, step
             if step - best_step > 100:
+                learning_rate /= 10
+                best_cost = float('-inf')
+            if learning_rate < 1e-6:
                 break
             step += 1
             if step % 100 == 0:
-                print(step, cost, self._sess.run(learning_rate))
+                print('step %6d (lr %6.6f): %9.2f' % (step, learning_rate, cost))
 
         self.params = dict(
             beta=self._sess.run(beta),
diff --git a/test_convoys.py b/test_convoys.py
index fb82216..558314e 100644
--- a/test_convoys.py
+++ b/test_convoys.py
@@ -15,7 +15,7 @@ def sample_weibull(k, lambd):
     return (-numpy.log(random.random())) ** (1.0/k) / lambd
 
 
-def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
+def test_exponential_regression_model(c=0.3, lambd=0.1, n=100000):
     # With a really long observation window, the rate should converge to the measured
     X = numpy.ones((n, 1))
     B = numpy.array([bool(random.random() < c) for x in range(n)])
@@ -38,7 +38,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000):
     assert 0.95*c_hi < y_hi < 1.05 * c_hi
 
 
-def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=10000):
+def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=100000):
     X = numpy.array([[1] + [r % len(cs) == j for j in range(len(cs))] for r in range(n)])
     B = numpy.array([bool(random.random() < cs[r % len(cs)]) for r in range(n)])
     T = numpy.array([b and sample_weibull(k, lambd) or 1000 for b in B])
@@ -49,7 +49,7 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=10000)
         assert 0.95 * c < model.predict_final(x) < 1.05 * c
 
 
-def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=10000):
+def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=100000):
     X = numpy.ones((n, 1))
     B = numpy.array([bool(random.random() < c) for r in range(n)])
     c = numpy.mean(B)
@@ -61,11 +61,10 @@ def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=10000):
     c_lo = scipy.stats.beta.ppf(0.025, n*c, n*(1-c))
     c_hi = scipy.stats.beta.ppf(0.975, n*c, n*(1-c))
     assert 0.95*c < y < 1.05 * c
-    assert 0.95*c_lo < y_lo < 1.05 * c_lo
-    assert 0.95*c_hi < y_hi < 1.05 * c_hi
+    assert 0.95*(c_hi-c_lo) < (y_hi-y_lo) < 1.05 * (c_hi-c_lo)
 
 
-def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=10000):
+def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=100000):
     X = numpy.ones((n, 1))
     B = numpy.array([bool(random.random() < c) for r in range(n)])
     T = numpy.array([b and scipy.stats.gamma.rvs(a=k, scale=1.0/lambd) or 1000 for b in B])