Merge eb25d19 into 62c0e24

better · Feb 7, 2018 · 383975d · 383975d
2 parents 62c0e24 + eb25d19
commit 383975d
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 0 deletions.
diff --git a/convoys/regression.py b/convoys/regression.py
@@ -0,0 +1,40 @@
+import numpy
+import pymc3
+import random
+from scipy.special import expit
+from pymc3.math import dot, sigmoid, log, exp
+
+from convoys import Model
+
+class WeibullRegression(Model):
+    def fit(self, X, B, T):
+        n, k = X.shape
+        with pymc3.Model() as m:
+            beta_sd = pymc3.Exponential('beta_sd', 1.0)  # Weak prior for the regression coefficients
+            beta = pymc3.Normal('beta', mu=0, sd=beta_sd, shape=(k,))  # Regression coefficients
+            c = sigmoid(dot(X, beta))  # Conversion rates for each example
+            k = pymc3.Lognormal('k', mu=0, sd=1.0)  # Weak prior around k=1
+            lambd = pymc3.Exponential('lambd', 0.1)  # Weak prior
+
+            # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
+            LL_observed = log(c) + log(k) + log(lambd) + (k-1)*(log(T) + log(lambd)) - (T*lambd)**k
+            # CDF of Weibull: 1 - exp(-(t * lambda)^k)
+            LL_censored = log((1-c) + c * exp(-(T*lambd)**k))
+
+            # We need to implement the likelihood using pymc3.Potential (custom likelihood)
+            # https://github.com/pymc-devs/pymc3/issues/826
+            logp = B * LL_observed + (1 - B) * LL_censored
+            logpvar = pymc3.Potential('logpvar', logp.sum())
+
+            self.trace = pymc3.sample(n_simulations=500, tune=500, discard_tuned_samples=True, njobs=1)
+            print('done')
+        print('done 2')
+
+    def predict(self):
+        pass  # TODO: implement
+
+    def predict_final(self, x):
+        return numpy.mean(expit(numpy.dot(self.trace['beta'], x)))
+
+    def predict_time(self):
+        pass  # TODO: implement
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,7 @@ autograd==1.2
 lifelines==0.11.2
 matplotlib>=2.0.0
 numpy
+pymc3==3.3
 scipy
 seaborn==0.8.1
 six==1.11.0
diff --git a/test_convoys.py b/test_convoys.py
@@ -1,10 +1,12 @@
 import datetime
 import matplotlib
 import numpy
+import pytest
 import random
 import scipy.stats
 matplotlib.use('Agg')  # Needed for matplotlib to run in Travis
 import convoys
+import convoys.regression
 
 
 def test_exponential_model(c=0.3, lambd=0.1, n=100000):
@@ -55,6 +57,20 @@ def sample_weibull():
     assert 0.95*k < model.params['k'] < 1.05*k
 
 
+@pytest.mark.skip
+def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=10000):
+    def sample_weibull():
+        return (-numpy.log(random.random())) ** (1.0/k) / lambd
+    X = numpy.array([[1] + [r % len(cs) == j for j in range(len(cs))] for r in range(n)])
+    B = numpy.array([bool(random.random() < cs[r % len(cs)]) for r in range(n)])
+    T = numpy.array([b and sample_weibull() or 1000 for b in B])
+    model = convoys.regression.WeibullRegression()
+    model.fit(X, B, T)
+    for r, c in enumerate(cs):
+        x = [1] + [int(r == j) for j in range(len(cs))]
+        assert 0.95 * c < model.predict_final(x) < 1.05 * c
+
+
 def _get_data(c=0.3, k=10, lambd=0.1, n=1000):
     data = []
     now = datetime.datetime(2000, 7, 1)