Skip to content

Commit

Permalink
Merge 7fbfa38 into eb5926c
Browse files Browse the repository at this point in the history
  • Loading branch information
erikbern committed Mar 17, 2018
2 parents eb5926c + 7fbfa38 commit e0f32f6
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 66 deletions.
39 changes: 1 addition & 38 deletions convoys/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import abc
import bisect
import datetime
import lifelines
import math
import numpy
import random
import seaborn
import six
from matplotlib import pyplot
from convoys.model import Model
from convoys.regression import ExponentialRegression, WeibullRegression, GammaRegression
from convoys.single import KaplanMeier


def get_timescale(t):
Expand Down Expand Up @@ -38,41 +36,6 @@ def get_arrays(data, t_converter):
return X, numpy.array(B), numpy.array(T)


class KaplanMeier(Model):
def fit(self, X, B, T):
kmf = lifelines.KaplanMeierFitter()
kmf.fit(T, event_observed=B)
self.ts = kmf.survival_function_.index.values
self.ps = 1.0 - kmf.survival_function_['KM_estimate'].values
self.ps_hi = 1.0 - kmf.confidence_interval_['KM_estimate_lower_0.95'].values
self.ps_lo = 1.0 - kmf.confidence_interval_['KM_estimate_upper_0.95'].values

def predict(self, x, ts, ci=None):
js = [bisect.bisect_left(self.ts, t) for t in ts]
def array_lookup(a):
return numpy.array([a[j] for j in js if j < len(self.ts)])
if ci is not None:
return (array_lookup(self.ts), array_lookup(self.ps), array_lookup(self.ps_lo), array_lookup(self.ps_hi))
else:
return (array_lookup(self.ts), array_lookup(self.ps))

def predict_final(self, x, ci=None):
if ci is not None:
return (self.ps[-1], self.ps_lo[-1], self.ps_hi[-1])
else:
return self.ps[-1]

def predict_time(self, x, ci=None):
# TODO: should not use median here, but mean is no good
def median(ps):
i = bisect.bisect_left(ps, 0.5)
return self.ts[min(i, len(ps)-1)]
if ci is not None:
return median(self.ps), median(self.ps_lo), median(self.ps_hi)
else:
return median(self.ps)


def sample_event(model, x, t, hi=1e3):
# We are now at time t. Generate a random event whether the user is going to convert or not
# TODO: this is a hacky thing until we have a "invert CDF" method on each model
Expand Down
21 changes: 0 additions & 21 deletions convoys/model.py

This file was deleted.

10 changes: 4 additions & 6 deletions convoys/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import tensorflow as tf
import sys

from convoys.model import Model


tf.logging.set_verbosity(3)

Expand Down Expand Up @@ -87,12 +85,12 @@ def _predict(func_values, ci):
return numpy.mean(func_values, axis=axis), numpy.percentile(func_values, (1-ci)*50, axis=axis), numpy.percentile(func_values, (1+ci)*50, axis=axis)


class Regression(Model):
class RegressionModel:
def __init__(self, L2_reg=1.0):
self._L2_reg = L2_reg


class ExponentialRegression(Regression):
class ExponentialRegression(RegressionModel):
def fit(self, X, B, T):
n, k = X.shape
X_input, B_input, T_input = _get_constants((X, B, T))
Expand Down Expand Up @@ -134,7 +132,7 @@ def predict_time(self, x, ci=None, n=1000):
return _predict(1./numpy.exp(x_prod_alpha), ci)


class WeibullRegression(Regression):
class WeibullRegression(RegressionModel):
def fit(self, X, B, T):
n, k = X.shape
X_input, B_input, T_input = _get_constants((X, B, T))
Expand Down Expand Up @@ -180,7 +178,7 @@ def predict_time(self, x, ci=None, n=1000):
return _predict(1./numpy.exp(x_prod_alpha) * gamma(1 + 1./self.params['k']), ci)


class GammaRegression(Regression):
class GammaRegression(RegressionModel):
def fit(self, X, B, T):
n, k = X.shape
X_input, B_input, T_input = _get_constants((X, B, T))
Expand Down
37 changes: 37 additions & 0 deletions convoys/single.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import bisect
import lifelines
import numpy

class KaplanMeier:
def fit(self, X, B, T):
kmf = lifelines.KaplanMeierFitter()
kmf.fit(T, event_observed=B)
self.ts = kmf.survival_function_.index.values
self.ps = 1.0 - kmf.survival_function_['KM_estimate'].values
self.ps_hi = 1.0 - kmf.confidence_interval_['KM_estimate_lower_0.95'].values
self.ps_lo = 1.0 - kmf.confidence_interval_['KM_estimate_upper_0.95'].values

def predict(self, x, ts, ci=None):
js = [bisect.bisect_left(self.ts, t) for t in ts]
def array_lookup(a):
return numpy.array([a[j] for j in js if j < len(self.ts)])
if ci is not None:
return (array_lookup(self.ts), array_lookup(self.ps), array_lookup(self.ps_lo), array_lookup(self.ps_hi))
else:
return (array_lookup(self.ts), array_lookup(self.ps))

def predict_final(self, x, ci=None):
if ci is not None:
return (self.ps[-1], self.ps_lo[-1], self.ps_hi[-1])
else:
return self.ps[-1]

def predict_time(self, x, ci=None):
# TODO: should not use median here, but mean is no good
def median(ps):
i = bisect.bisect_left(ps, 0.5)
return self.ts[min(i, len(ps)-1)]
if ci is not None:
return median(self.ps), median(self.ps_lo), median(self.ps_hi)
else:
return median(self.ps)
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ matplotlib>=2.0.0
numpy
scipy
seaborn==0.8.1
six==1.11.0
tensorflow==1.6.0rc1
3 changes: 3 additions & 0 deletions test_convoys.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,6 @@ def test_plot_cohorts(cs=[0.3, 0.5, 0.7], k=2.0, lambd=0.1, n=100000):
assert group == 'Group 0'
assert 0.95*c < y < 1.05 * c
assert 0.70*(c_hi-c_lo) < (y_hi-y_lo) < 1.30*(c_hi-c_lo)

# Also plot with default arguments (TODO: add assertions)
convoys.plot_cohorts(data)

0 comments on commit e0f32f6

Please sign in to comment.