diff --git a/convoys/regression.py b/convoys/regression.py index 772341c..e84ea5a 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -1,5 +1,5 @@ import numpy # TODO: remove -from scipy.special import expit, gammainc # TODO: remove +from scipy.special import expit, gamma, gammainc # TODO: remove import scipy.stats import tensorflow as tf import sys @@ -91,14 +91,10 @@ def _predict(func_values, ci): return numpy.mean(func_values, axis=axis), numpy.percentile(func_values, (1-ci)*50, axis=axis), numpy.percentile(func_values, (1+ci)*50, axis=axis) - class Regression(Model): def __init__(self, L2_reg=1.0): self._L2_reg = L2_reg - def predict_time(self): - pass # TODO: implement - class ExponentialRegression(Regression): def fit(self, X, B, T): @@ -130,13 +126,17 @@ def fit(self, X, B, T): def predict(self, x, t, ci=None, n=1000): t = _fix_t(t) - kappa = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) - lambd = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) - return _predict(expit(kappa) * (1 - numpy.exp(-t * numpy.exp(lambd))), ci) + x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) + x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) + return _predict(expit(x_prod_beta) * (1 - numpy.exp(-t * numpy.exp(x_prod_alpha))), ci) def predict_final(self, x, ci=None, n=1000): - kappa = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) - return _predict(expit(kappa), ci) + x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) + return _predict(expit(x_prod_beta), ci) + + def predict_time(self, x, ci=None, n=1000): + x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) + return _predict(1./numpy.exp(x_prod_alpha), ci) class WeibullRegression(Regression): @@ -173,13 +173,17 @@ def fit(self, X, B, T): def predict(self, x, t, ci=None, n=1000): t = _fix_t(t) - kappa = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) - lambd = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) - return _predict(expit(kappa) * (1 - numpy.exp(-(t * numpy.exp(lambd))**self.params['k'])), ci) + x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) + x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) + return _predict(expit(x_prod_beta) * (1 - numpy.exp(-(t * numpy.exp(x_prod_alpha))**self.params['k'])), ci) def predict_final(self, x, ci=None, n=1000): - kappa = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) - return _predict(expit(kappa), ci) + x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) + return _predict(expit(x_prod_beta), ci) + + def predict_time(self, x, ci=None, n=1000): + x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) + return _predict(1./numpy.exp(x_prod_alpha) * gamma(1 + 1./self.params['k']), ci) class GammaRegression(Regression): @@ -216,10 +220,14 @@ def fit(self, X, B, T): def predict(self, x, t, ci=None, n=1000): t = _fix_t(t) - kappa = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) - lambd = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) - return _predict(expit(kappa) * (1 - gammainc(self.params['k'], t * numpy.exp(lambd))), ci) + x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) + x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) + return _predict(expit(x_prod_beta) * (1 - gammainc(self.params['k'], t * numpy.exp(x_prod_alpha))), ci) def predict_final(self, x, ci=None, n=1000): - kappa = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) - return _predict(expit(kappa), ci) + x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci) + return _predict(expit(x_prod_beta), ci) + + def predict_time(self, x, ci=None, n=1000): + x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci) + return _predict(self.params['k']/numpy.exp(x_prod_alpha), ci) diff --git a/test_convoys.py b/test_convoys.py index 50ba4d8..7bb3de6 100644 --- a/test_convoys.py +++ b/test_convoys.py @@ -3,6 +3,7 @@ import numpy import pytest import random +import scipy.special import scipy.stats matplotlib.use('Agg') # Needed for matplotlib to run in Travis import convoys @@ -29,6 +30,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=100000): model = convoys.regression.ExponentialRegression() model.fit(X, B, T) assert 0.95*c < model.predict_final([1]) < 1.05*c + assert 0.95/lambd < model.predict_time([1]) < 1.05/lambd t = 10 d = 1 - numpy.exp(-lambd*t) assert 0.95*c*d < model.predict([1], t) < 1.05*c*d @@ -53,6 +55,8 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=100000 for r, c in enumerate(cs): x = [1] + [int(r == j) for j in range(len(cs))] assert 0.95 * c < model.predict_final(x) < 1.05 * c + expected_time = 1./lambd * scipy.special.gamma(1 + 1/k) + assert 0.95*expected_time < model.predict_time(x) < 1.05*expected_time def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=100000): @@ -84,6 +88,7 @@ def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=100000): assert 0.95*c < model.predict_final([1]) < 1.05*c assert 0.90*k < model.params['k'] < 1.10*k assert 0.90*lambd < numpy.exp(model.params['alpha']) < 1.10*lambd + assert 0.95*k/lambd < model.predict_time([1]) < 1.05*k/lambd def test_plot_cohorts(cs=[0.3, 0.5, 0.7], k=2.0, lambd=0.1, n=100000):