From 2eaf8fce6760e58a42759389b2079bfcdf3e1a21 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <erikbern@better.com>
Date: Tue, 10 Apr 2018 12:25:17 -0400
Subject: [PATCH 1/2] Refactor Exponential, Weibull and Gamma to be special
 cases of GeneralizedGamma

---
 convoys/__init__.py   |   3 +-
 convoys/multi.py      |   4 ++
 convoys/regression.py | 124 ++++++++++++------------------------------
 convoys/tf_utils.py   |  17 ++++++
 4 files changed, 58 insertions(+), 90 deletions(-)

diff --git a/convoys/__init__.py b/convoys/__init__.py
index f4a907b..434e481 100644
--- a/convoys/__init__.py
+++ b/convoys/__init__.py
@@ -3,7 +3,7 @@
 import random
 import seaborn
 from matplotlib import pyplot
-from convoys.multi import Exponential, Weibull, Gamma, KaplanMeier, Nonparametric
+from convoys.multi import *
 
 
 def get_timescale(t):
@@ -59,6 +59,7 @@ def get_groups(data, group_min_size, max_groups):
     'exponential': Exponential,
     'weibull': Weibull,
     'gamma': Gamma,
+    'generalized-gamma': GeneralizedGamma,
 }
 
 
diff --git a/convoys/multi.py b/convoys/multi.py
index e0829d7..95f7737 100644
--- a/convoys/multi.py
+++ b/convoys/multi.py
@@ -56,6 +56,10 @@ class Gamma(RegressionToMulti):
     base_model_cls = regression.Gamma
 
 
+class GeneralizedGamma(RegressionToMulti):
+    base_model_cls = regression.GeneralizedGamma
+
+
 class KaplanMeier(SingleToMulti):
     base_model_cls = single.KaplanMeier
 
diff --git a/convoys/regression.py b/convoys/regression.py
index 128bc81..7dd8e46 100644
--- a/convoys/regression.py
+++ b/convoys/regression.py
@@ -38,57 +38,36 @@ class RegressionModel:
     pass
 
 
-class Exponential(RegressionModel):
-    def fit(self, X, B, T):
-        n, k = X.shape
+class GeneralizedGamma(RegressionModel):
+    # https://en.wikipedia.org/wiki/Generalized_gamma_distribution
+    # Note however that lambda is a^-1 in WP's notation
+    # Note also that k = d/p so d = k*p
+    def fit(self, X, B, T, k=None, p=None):
+        n_features = X.shape[1]
         X_batch, B_batch, T_batch = tf_utils.get_batch_placeholders((X, B, T))
 
         X_batch = tf.nn.dropout(X_batch, keep_prob=0.5)
-        a = LinearCombination(X_batch, k)
-        b = LinearCombination(X_batch, k)
+        a = LinearCombination(X_batch, n_features)
+        b = LinearCombination(X_batch, n_features)
         lambd = tf.exp(a.y)
         c = tf.sigmoid(b.y)
 
-        log_pdf = tf.log(lambd) - T_batch*lambd
-        cdf = 1 - tf.exp(-(T_batch * lambd))
-
-        LL_observed = tf.log(c) + log_pdf
-        LL_censored = tf.log((1-c) + c * (1 - cdf))
-
-        LL = tf.reduce_sum(B_batch * LL_observed + (1 - B_batch) * LL_censored, 0)
-
-        with tf.Session() as sess:
-            feed_dict = {X_batch: X, B_batch: B, T_batch: T}
-            tf_utils.optimize(sess, LL, feed_dict)
-            self.params = {
-                'a': a.params(sess, LL, feed_dict),
-                'b': b.params(sess, LL, feed_dict),
-            }
-
-    def cdf(self, x, t, ci=None, n=1000):
-        t = numpy.array(t)
-        a = LinearCombination.sample(self.params['a'], x, ci, n)
-        b = LinearCombination.sample(self.params['b'], x, ci, n)
-        return tf_utils.predict(expit(b) * (1 - numpy.exp(numpy.multiply.outer(-t, numpy.exp(a)))), ci)
-
-
-class Weibull(RegressionModel):
-    def fit(self, X, B, T):
-        n, k = X.shape
-        X_batch, B_batch, T_batch = tf_utils.get_batch_placeholders((X, B, T))
+        if k is None:
+            k = tf.Variable(1.0, name='k', trainable=False)
+            should_update_k = True
+        else:
+            k = tf.constant(k, tf.float32)
+            should_update_k = False
 
-        log_k_var = tf.Variable(tf.zeros([]), name='log_k')
-        X_batch = tf.nn.dropout(X_batch, keep_prob=0.5)
-        a = LinearCombination(X_batch, k)
-        b = LinearCombination(X_batch, k)
-        k = tf.exp(log_k_var)
-        lambd = tf.exp(a.y)
-        c = tf.sigmoid(b.y)
+        if p is None:
+            log_p_var = tf.Variable(tf.zeros([]), name='log_p')
+            p = tf.exp(log_p_var)
+        else:
+            p = tf.constant(p, tf.float32)
 
-        # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
-        log_pdf = tf.log(k) + tf.log(lambd) + (k-1)*(tf.log(T_batch) + tf.log(lambd)) - (T_batch*lambd)**k
-        # CDF of Weibull: 1 - exp(-(t * lambda)^k)
-        cdf = 1 - tf.exp(-(T_batch * lambd)**k)
+        # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p)
+        log_pdf = tf.log(p) + (k*p) * tf.log(lambd) - tf.lgamma(k) + (k*p-1) * tf.log(T_batch) - (T_batch*lambd)**p
+        cdf = tf.igamma(k, (T_batch*lambd)**p)
 
         LL_observed = tf.log(c) + log_pdf
         LL_censored = tf.log((1-c) + c * (1 - cdf))
@@ -97,66 +76,33 @@ def fit(self, X, B, T):
 
         with tf.Session() as sess:
             feed_dict = {X_batch: X, B_batch: B, T_batch: T}
-            tf_utils.optimize(sess, LL, feed_dict)
+            tf_utils.optimize(sess, LL, feed_dict, 
+                              update_callback=(tf_utils.get_tweaker(sess, LL, k, feed_dict)
+                                               if should_update_k else None))
             self.params = {
                 'a': a.params(sess, LL, feed_dict),
                 'b': b.params(sess, LL, feed_dict),
                 'k': sess.run(k),
+                'p': sess.run(p),
             }
 
     def cdf(self, x, t, ci=None, n=1000):
         t = numpy.array(t)
         a = LinearCombination.sample(self.params['a'], x, ci, n)
         b = LinearCombination.sample(self.params['b'], x, ci, n)
-        return tf_utils.predict(expit(b) * (1 - numpy.exp(-numpy.multiply.outer(t, numpy.exp(a))**self.params['k'])), ci)
+        return tf_utils.predict(expit(b) * gammainc(self.params['k'], numpy.multiply.outer(t, numpy.exp(a))**self.params['p']), ci)
 
 
-class Gamma(RegressionModel):
+class Exponential(GeneralizedGamma):
     def fit(self, X, B, T):
-        n, k = X.shape
-        X_batch, B_batch, T_batch = tf_utils.get_batch_placeholders((X, B, T))
-
-        X_batch = tf.nn.dropout(X_batch, keep_prob=0.5)
-        a = LinearCombination(X_batch, k)
-        b = LinearCombination(X_batch, k)
-        k = tf.Variable(2.0, name='k', trainable=False)
-        lambd = tf.exp(a.y)
-        c = tf.sigmoid(b.y)
-
-        # PDF of gamma: 1.0 / gamma(k) * lambda ^ k * t^(k-1) * exp(-t * lambda)
-        log_pdf = -tf.lgamma(k) + k*tf.log(lambd) + (k-1)*tf.log(T_batch) - lambd*T_batch
-        # CDF of gamma: gammainc(k, lambda * t)
-        cdf = tf.igamma(k, lambd * T_batch)
-
-        LL_observed = tf.log(c) + log_pdf
-        LL_censored = tf.log((1-c) + c * (1 - cdf))
-
-        LL = tf.reduce_sum(B_batch * LL_observed + (1 - B_batch) * LL_censored, 0)
-        feed_dict = {X_batch: X, B_batch: B, T_batch: T}
+        super(Exponential, self).fit(X, B, T, k=1, p=1)
 
-        new_k = tf.placeholder(tf.float32, shape=[])
-        assign_k = tf.assign(k, new_k)
 
-        def update_k(sess):
-            # tf.igamma doesn't compute the gradient wrt a properly
-            # So let's just try small perturbations
-            k_value = sess.run(k)
-            res = {}
-            for k_mult in [0.97, 1.0, 1.03]:
-                sess.run(assign_k, feed_dict={new_k: k_value * k_mult})
-                res[k_value * k_mult] = sess.run(LL, feed_dict=feed_dict)
-            sess.run(assign_k, feed_dict={new_k: max(res.keys(), key=res.get)})
+class Weibull(GeneralizedGamma):
+    def fit(self, X, B, T):
+        super(Weibull, self).fit(X, B, T, k=1)
 
-        with tf.Session() as sess:
-            tf_utils.optimize(sess, LL, feed_dict, update_callback=update_k)
-            self.params = {
-                'a': a.params(sess, LL, feed_dict),
-                'b': b.params(sess, LL, feed_dict),
-                'k': sess.run(k),
-            }
 
-    def cdf(self, x, t, ci=None, n=1000):
-        t = numpy.array(t)
-        a = LinearCombination.sample(self.params['a'], x, ci, n)
-        b = LinearCombination.sample(self.params['b'], x, ci, n)
-        return tf_utils.predict(expit(b) * gammainc(self.params['k'], numpy.multiply.outer(t, numpy.exp(a))), ci)
+class Gamma(GeneralizedGamma):
+    def fit(self, X, B, T):
+        super(Gamma, self).fit(X, B, T, p=1)
diff --git a/convoys/tf_utils.py b/convoys/tf_utils.py
index 7122dbe..e49e37a 100644
--- a/convoys/tf_utils.py
+++ b/convoys/tf_utils.py
@@ -48,6 +48,23 @@ def optimize(sess, target, placeholders, batch_size=1024, update_callback=None):
             update_callback(sess)
 
 
+def get_tweaker(sess, target, z, feed_dict):
+    new_z = tf.placeholder(tf.float32, shape=z.shape)
+    assign_z = tf.assign(z, new_z)
+
+    def tweak_z(sess):
+        # tf.igamma doesn't compute the gradient wrt a properly
+        # So let's just try small perturbations
+        z_value = sess.run(z)
+        res = {}
+        for z_mult in [0.97, 1.0, 1.03]:
+            sess.run(assign_z, feed_dict={new_z: z_value * z_mult})
+            res[z_value * z_mult] = sess.run(target, feed_dict=feed_dict)
+        sess.run(assign_z, feed_dict={new_z: max(res.keys(), key=res.get)})
+
+    return tweak_z
+
+
 def predict(func_values, ci):
     if ci is None:
         return func_values

From 36616859dc4ce3016a3b1ae192fb2c4cf51cc1e4 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <erikbern@better.com>
Date: Tue, 10 Apr 2018 12:30:57 -0400
Subject: [PATCH 2/2] hound

---
 convoys/__init__.py   |  3 ++-
 convoys/regression.py | 18 +++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/convoys/__init__.py b/convoys/__init__.py
index 434e481..07d5908 100644
--- a/convoys/__init__.py
+++ b/convoys/__init__.py
@@ -3,7 +3,8 @@
 import random
 import seaborn
 from matplotlib import pyplot
-from convoys.multi import *
+from convoys.multi import Exponential, Weibull, Gamma, GeneralizedGamma, \
+    KaplanMeier, Nonparametric
 
 
 def get_timescale(t):
diff --git a/convoys/regression.py b/convoys/regression.py
index 7dd8e46..45e02fe 100644
--- a/convoys/regression.py
+++ b/convoys/regression.py
@@ -66,7 +66,10 @@ def fit(self, X, B, T, k=None, p=None):
             p = tf.constant(p, tf.float32)
 
         # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p)
-        log_pdf = tf.log(p) + (k*p) * tf.log(lambd) - tf.lgamma(k) + (k*p-1) * tf.log(T_batch) - (T_batch*lambd)**p
+        log_pdf = \
+            tf.log(p) + (k*p) * tf.log(lambd) \
+            - tf.lgamma(k) + (k*p-1) * tf.log(T_batch) \
+            - (T_batch*lambd)**p
         cdf = tf.igamma(k, (T_batch*lambd)**p)
 
         LL_observed = tf.log(c) + log_pdf
@@ -76,9 +79,10 @@ def fit(self, X, B, T, k=None, p=None):
 
         with tf.Session() as sess:
             feed_dict = {X_batch: X, B_batch: B, T_batch: T}
-            tf_utils.optimize(sess, LL, feed_dict, 
-                              update_callback=(tf_utils.get_tweaker(sess, LL, k, feed_dict)
-                                               if should_update_k else None))
+            tf_utils.optimize(
+                sess, LL, feed_dict,
+                update_callback=(tf_utils.get_tweaker(sess, LL, k, feed_dict)
+                                 if should_update_k else None))
             self.params = {
                 'a': a.params(sess, LL, feed_dict),
                 'b': b.params(sess, LL, feed_dict),
@@ -90,7 +94,11 @@ def cdf(self, x, t, ci=None, n=1000):
         t = numpy.array(t)
         a = LinearCombination.sample(self.params['a'], x, ci, n)
         b = LinearCombination.sample(self.params['b'], x, ci, n)
-        return tf_utils.predict(expit(b) * gammainc(self.params['k'], numpy.multiply.outer(t, numpy.exp(a))**self.params['p']), ci)
+        return tf_utils.predict(
+            expit(b) * gammainc(
+                self.params['k'],
+                numpy.multiply.outer(t, numpy.exp(a))**self.params['p']),
+            ci)
 
 
 class Exponential(GeneralizedGamma):