From 651a5ef54275b239f0d98bd29f0f6303a04fbfe3 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Sun, 1 Sep 2019 22:42:47 -0400 Subject: [PATCH 1/2] Fix super confusing shape handling in .cdf --- convoys/regression.py | 3 ++- test_convoys.py | 47 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/convoys/regression.py b/convoys/regression.py index c1c6878..a44da6f 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -297,6 +297,7 @@ def cdf(self, x, t, ci=None): else: assert self._ci params = self.params['samples'] + t = numpy.expand_dims(t, -1) lambd = exp(dot(x, params['alpha'].T) + params['a']) if self._flavor == 'logistic': c = expit(dot(x, params['beta'].T) + params['b']) @@ -304,7 +305,7 @@ def cdf(self, x, t, ci=None): c = dot(x, params['beta'].T) + params['b'] M = c * gammainc( params['k'], - numpy.multiply.outer(t, lambd)**params['p']) + (t*lambd)**params['p']) if not ci: return M diff --git a/test_convoys.py b/test_convoys.py index 8c7fbe8..8207097 100644 --- a/test_convoys.py +++ b/test_convoys.py @@ -56,6 +56,40 @@ def test_kaplan_meier_model(): assert m.cdf(0, 9) == 0.75 +def test_output_shapes(c=0.3, lambd=0.1, n=1000, k=5): + X = numpy.random.randn(n, k) + C = scipy.stats.bernoulli.rvs(c, size=(n,)) + N = scipy.stats.uniform.rvs(scale=5./lambd, size=(n,)) + E = scipy.stats.expon.rvs(scale=1./lambd, size=(n,)) + B, T = generate_censored_data(N, E, C) + + # Fit model with ci + model = convoys.regression.Exponential(ci=True) + model.fit(X, B, T) + + # Generate output without ci + assert model.cdf(X[0], 0).shape == () + assert model.cdf([X[0], X[1]], 0).shape == (2,) + assert model.cdf([X[0]], [0, 1, 2, 3]).shape == (4,) + assert model.cdf([X[0], X[1], X[2]], [0, 1, 2]).shape == (3,) + assert model.cdf([[X[0], X[1]]], [[0], [1], [2]]).shape == (3, 2) + assert model.cdf([[X[0]], [X[1]]], [[0, 1, 2]]).shape == (2, 3) + + # Generate output with ci (same as above plus (3,)) + assert model.cdf(X[0], 0, ci=0.8).shape == (3,) + assert model.cdf([X[0], X[1]], 0, ci=0.8).shape == (2, 3) + assert model.cdf([X[0]], [0, 1, 2, 3], ci=0.8).shape == (4, 3) + assert model.cdf([X[0], X[1], X[2]], [0, 1, 2], ci=0.8).shape == (3, 3) + assert model.cdf([[X[0], X[1]]], [[0], [1], [2]], ci=0.8).shape == (3, 2, 3) + assert model.cdf([[X[0]], [X[1]]], [[0, 1, 2]], ci=0.8).shape == (2, 3, 3) + + # Fit model without ci (should be the same) + model = convoys.regression.Exponential(ci=False) + model.fit(X, B, T) + assert model.cdf(X[0], 0).shape == () + assert model.cdf([X[0], X[1]], [0, 1]).shape == (2,) + + @flaky.flaky def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): X = numpy.ones((n, 1)) @@ -65,11 +99,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): B, T = generate_censored_data(N, E, C) model = convoys.regression.Exponential(ci=True) model.fit(X, B, T) - assert model.cdf([1], float('inf')).shape == () assert 0.80*c < model.cdf([1], float('inf')) < 1.30*c - assert model.cdf([1], 0).shape == () - assert model.cdf([[1], [2]], 0).shape == (2,) - assert model.cdf([1], [0, 1, 2, 3]).shape == (4,) for t in [1, 3, 10]: d = 1 - numpy.exp(-lambd*t) assert 0.80*c*d < model.cdf([1], t) < 1.30*c*d @@ -88,17 +118,14 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): d = 1 - numpy.exp(-lambd*t) assert 0.70*d < (convert_times < t).mean() < 1.30*d - # Fit model without ci - model = convoys.regression.Exponential(ci=False) - model.fit(X, B, T) - assert model.cdf([1], 0).shape == () - assert model.cdf([1], [0, 1, 2, 3]).shape == (4,) - # Fit a linear model model = convoys.regression.Exponential(ci=False, flavor='linear') model.fit(X, B, T) model_c = model.params['map']['b'] + model.params['map']['beta'][0] assert 0.9*c < model_c < 1.1*c + for t in [1, 3, 10]: + d = 1 - numpy.exp(-lambd*t) + assert 0.80*c*d < model.cdf([1], t) < 1.30*c*d @flaky.flaky From 3c8842a1edf57a9e7de3465272a84b0f40b332c0 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Sun, 1 Sep 2019 22:44:33 -0400 Subject: [PATCH 2/2] pre-emptive hound --- test_convoys.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test_convoys.py b/test_convoys.py index 8207097..e0dd410 100644 --- a/test_convoys.py +++ b/test_convoys.py @@ -79,9 +79,12 @@ def test_output_shapes(c=0.3, lambd=0.1, n=1000, k=5): assert model.cdf(X[0], 0, ci=0.8).shape == (3,) assert model.cdf([X[0], X[1]], 0, ci=0.8).shape == (2, 3) assert model.cdf([X[0]], [0, 1, 2, 3], ci=0.8).shape == (4, 3) - assert model.cdf([X[0], X[1], X[2]], [0, 1, 2], ci=0.8).shape == (3, 3) - assert model.cdf([[X[0], X[1]]], [[0], [1], [2]], ci=0.8).shape == (3, 2, 3) - assert model.cdf([[X[0]], [X[1]]], [[0, 1, 2]], ci=0.8).shape == (2, 3, 3) + assert model.cdf([X[0], X[1], X[2]], [0, 1, 2], ci=0.8) \ + .shape == (3, 3) + assert model.cdf([[X[0], X[1]]], [[0], [1], [2]], ci=0.8) \ + .shape == (3, 2, 3) + assert model.cdf([[X[0]], [X[1]]], [[0, 1, 2]], ci=0.8) \ + .shape == (2, 3, 3) # Fit model without ci (should be the same) model = convoys.regression.Exponential(ci=False)