From a032076f55485d0395f295f495b3a8d4814e2ff4 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Mon, 19 Feb 2018 20:11:17 -0500 Subject: [PATCH 1/6] remove basic & simple binomial --- convoys/__init__.py | 55 --------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/convoys/__init__.py b/convoys/__init__.py index 6d27556..60d45f9 100644 --- a/convoys/__init__.py +++ b/convoys/__init__.py @@ -72,59 +72,6 @@ def predict_time(self, ci=None): pass -class SimpleBinomial(Model): - def fit(self, C, N, B): - self.params['a'] = len(B) - self.params['b'] = sum(B) - - def predict(self, ts, ci=None): - a, b = self.params['a'], self.params['b'] - def rep(x): - return numpy.ones(len(ts)) * x - if ci is not None: - return ts, rep(b/a), rep(scipy.stats.beta.ppf((1 - ci)/2, b, a-b)), rep(scipy.stats.beta.ppf((1 + ci)/2, b, a-b)) - else: - return ts, rep(b/a) - - def predict_final(self, ci=None): - a, b = self.params['a'], self.params['b'] - if ci is not None: - return b/a, scipy.stats.beta.ppf((1 - ci)/2, b, a-b), scipy.stats.beta.ppf((1 + ci)/2, b, a-b) - else: - return b/a - - def predict_time(self): - return 0.0 - - -class Basic(Model): - def fit(self, C, N, B, n_limit=30): - n, k = len(C), 0 - self.ts = [0] - self.ns = [n] - self.ks = [k] - events = [(c, 1, 0) for c, n, b in zip(C, N, B) if b] + \ - [(n, -int(b), -1) for c, n, b in zip(C, N, B)] - for t, k_delta, n_delta in sorted(events): - k += k_delta - n += n_delta - self.ts.append(t) - self.ks.append(k) - self.ns.append(n) - if n < n_limit: - break - - def predict(self, ts, ci=None): - js = [bisect.bisect_left(self.ts, t) for t in ts] - ks = numpy.array([self.ks[j] for j in js if j < len(self.ks)]) - ns = numpy.array([self.ns[j] for j in js if j < len(self.ns)]) - ts = numpy.array([ts[j] for j in js if j < len(self.ns)]) - if ci is not None: - return ts, ks / ns, scipy.stats.beta.ppf((1 - ci)/2, ks, ns-ks), scipy.stats.beta.ppf((1 + ci)/2, ks, ns-ks) - else: - return ts, ks / ns - - class KaplanMeier(Model): def fit(self, C, N, B): T = [c if b else n for c, n, b in zip(C, N, B)] @@ -364,9 +311,7 @@ def split_by_group(data, group_min_size, max_groups): _models = { - 'basic': Basic, 'kaplan-meier': KaplanMeier, - 'simple-binomial': SimpleBinomial, 'exponential': Exponential, 'weibull': Weibull, 'gamma': Gamma, From 742f61c69697824e65e5841fdf781657be517ed5 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Mon, 19 Feb 2018 21:05:48 -0500 Subject: [PATCH 2/6] factor out model into a separate file --- convoys/__init__.py | 23 +---------------------- convoys/model.py | 24 ++++++++++++++++++++++++ convoys/regression.py | 2 +- 3 files changed, 26 insertions(+), 23 deletions(-) create mode 100644 convoys/model.py diff --git a/convoys/__init__.py b/convoys/__init__.py index 60d45f9..4baa1bd 100644 --- a/convoys/__init__.py +++ b/convoys/__init__.py @@ -12,6 +12,7 @@ from autograd.scipy.special import expit, gamma, gammainc, gammaincc, gammaln from autograd.numpy import exp, log, sum from matplotlib import pyplot +from convoys.model import Model LOG_EPS = 1e-12 # Used for log likelihood @@ -50,28 +51,6 @@ def get_arrays(data, t_converter): return numpy.array(C), numpy.array(N), numpy.array(B) -@six.add_metaclass(abc.ABCMeta) -class Model(): - def __init__(self, params={}): - self.params = params - - @abc.abstractmethod - def fit(self, C, N, B): - pass - - @abc.abstractmethod - def predict(self, ts, ci=None): - pass - - @abc.abstractmethod - def predict_final(self, ci=None): - pass - - @abc.abstractmethod - def predict_time(self, ci=None): - pass - - class KaplanMeier(Model): def fit(self, C, N, B): T = [c if b else n for c, n, b in zip(C, N, B)] diff --git a/convoys/model.py b/convoys/model.py new file mode 100644 index 0000000..ce96a12 --- /dev/null +++ b/convoys/model.py @@ -0,0 +1,24 @@ +import abc +import six + + +@six.add_metaclass(abc.ABCMeta) +class Model(): + def __init__(self, params={}): + self.params = params + + @abc.abstractmethod + def fit(self, C, N, B): + pass + + @abc.abstractmethod + def predict(self, ts, ci=None): + pass + + @abc.abstractmethod + def predict_final(self, ci=None): + pass + + @abc.abstractmethod + def predict_time(self, ci=None): + pass diff --git a/convoys/regression.py b/convoys/regression.py index a8e321b..a5a2e4f 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -3,7 +3,7 @@ import scipy.stats import tensorflow as tf -from convoys import Model +from convoys.model import Model class Regression(Model): # This will replace the model in __init__.py soon. From 11e08c0a14d8c5f7109c76435abe5d90b9d39de4 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Mon, 19 Feb 2018 21:38:33 -0500 Subject: [PATCH 3/6] switch to use the new regression models for everything still fits them per-group in the plotting functions, which is a bit dumb, but will fix that later --- convoys/__init__.py | 213 +++++------------------------------------- convoys/model.py | 11 +-- convoys/regression.py | 20 ++-- test_convoys.py | 46 +-------- 4 files changed, 42 insertions(+), 248 deletions(-) diff --git a/convoys/__init__.py b/convoys/__init__.py index 4baa1bd..b1cb182 100644 --- a/convoys/__init__.py +++ b/convoys/__init__.py @@ -13,15 +13,7 @@ from autograd.numpy import exp, log, sum from matplotlib import pyplot from convoys.model import Model - -LOG_EPS = 1e-12 # Used for log likelihood - - -def log1pexp(x): - # returns log(1 + exp(x)) - p, q = min(x, 0), max(x, 0) - return q + log(exp(p-q) + 1) - +from convoys.regression import ExponentialRegression, WeibullRegression, GammaRegression def get_timescale(t): @@ -42,18 +34,16 @@ def get_timedelta_converter(t_factor): def get_arrays(data, t_converter): - C = [t_converter(converted_at - created_at) if converted_at is not None else 1.0 - for created_at, converted_at, now in data] - N = [t_converter(now - created_at) - for created_at, converted_at, now in data] + X = numpy.ones((len(data), 1)) B = [bool(converted_at is not None) for created_at, converted_at, now in data] - return numpy.array(C), numpy.array(N), numpy.array(B) + T = [t_converter(converted_at - created_at) if converted_at is not None else t_converter(now - created_at) + for created_at, converted_at, now in data] + return X, numpy.array(B), numpy.array(T) class KaplanMeier(Model): - def fit(self, C, N, B): - T = [c if b else n for c, n, b in zip(C, N, B)] + def fit(self, X, B, T): kmf = lifelines.KaplanMeierFitter() kmf.fit(T, event_observed=B) self.ts = kmf.survival_function_.index.values @@ -61,7 +51,7 @@ def fit(self, C, N, B): self.ps_hi = 1.0 - kmf.confidence_interval_['KM_estimate_lower_0.95'].values self.ps_lo = 1.0 - kmf.confidence_interval_['KM_estimate_upper_0.95'].values - def predict(self, ts, ci=None): + def predict(self, x, ts, ci=None): js = [bisect.bisect_left(self.ts, t) for t in ts] def array_lookup(a): return numpy.array([a[j] for j in js if j < len(self.ts)]) @@ -70,13 +60,13 @@ def array_lookup(a): else: return (array_lookup(self.ts), array_lookup(self.ps)) - def predict_final(self, ci=None): + def predict_final(self, x, ci=None): if ci is not None: return (self.ps[-1], self.ps_lo[-1], self.ps_hi[-1]) else: return self.ps[-1] - def predict_time(self, ci=None): + def predict_time(self, x, ci=None): # TODO: should not use median here, but mean is no good def median(ps): i = bisect.bisect_left(ps, 0.5) @@ -87,167 +77,12 @@ def median(ps): return median(self.ps) -def fit_beta(c, fc): - # Approximate a Beta distribution for c by fitting two things - # 1. second derivative wrt c should match the second derivative of a beta - # LL = (a-1)log(c) + (b-1)log(1-c) - # dLL = (a-1)/c - (b-1)/(1-c) - # ddLL = -(a-1)/c^2 - (b-1)/(1-c)^2 = -h - # a(-1/c^2) + b(-1/(1-c)^2) = -h - 1/c^2 - 1/(1-c)^2 - # 2. mode should match c, i.e. (a-1)/(a+b-2) = c <=> a-1 = c(a+b-2) - # a(1-c) - bc = 1-2c - h = grad(grad(fc))(c) - M = numpy.array([[-1/c**2, -1/(1-c)**2], - [(1-c), -c]]) - q = numpy.array([-h - 1/c**2 - 1/(1-c)**2, 1 - 2*c]) - try: - a, b = numpy.linalg.solve(M, q) - except numpy.linalg.linalg.LinAlgError: - a, b = 1, 1 - return a, b - - -class Exponential(Model): - def fit(self, C, N, B): - def transform(x): - p, q = x - return (expit(p), log1pexp(q)) - def f(x): - c, lambd = x - LL_observed = log(c) + log(lambd) - lambd*C - LL_censored = log((1 - c) + c * exp(-lambd*N)) - neg_LL = -sum(B * LL_observed + (1 - B) * LL_censored) - return neg_LL - - g = lambda x: f(transform(x)) - res = scipy.optimize.minimize( - fun=g, - jac=jacobian(g), - hess=hessian(g), - x0=(0, 0), - method='trust-ncg') - c, lambd = transform(res.x) - fc = lambda c: f((c, lambd)) - a, b = fit_beta(c, fc) - self.params = dict(a=a, b=b, lambd=lambd) - - def predict(self, ts, ci=None): - a, b, lambd = self.params['a'], self.params['b'], self.params['lambd'] - y = 1 - exp(-ts * lambd) - if ci is not None: - return ts, a / (a + b) * y, scipy.stats.beta.ppf((1 - ci)/2, a, b) * y, scipy.stats.beta.ppf((1 + ci)/2, a, b) * y - else: - return ts, y - - def predict_final(self, ci=None): - a, b = self.params['a'], self.params['b'] - if not ci: - return a / (a + b) - else: - return (a / (a + b), - scipy.stats.beta.ppf((1 - ci)/2, a, b), - scipy.stats.beta.ppf((1 + ci)/2, a, b)) - - def predict_time(self): - return 1.0 / self.params['lambd'] - - -class Weibull(Model): - def fit(self, C, N, B): - def transform(x): - p, q, r = x - return (expit(p), log1pexp(q), log1pexp(r)) - def f(x): - c, lambd, k = x - # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k) - LL_observed = log(c) + log(k) + log(lambd) + (k-1)*(log(C) + log(lambd)) - (C*lambd)**k - # CDF of Weibull: 1 - exp(-(t * lambda)^k) - LL_censored = log((1-c) + c * exp(-(N*lambd)**k)) - neg_LL = -sum(B * LL_observed + (1 - B) * LL_censored) - return neg_LL - - g = lambda x: f(transform(x)) - res = scipy.optimize.minimize( - fun=g, - jac=jacobian(g), - hess=hessian(g), - x0=(0, 0, 0), - method='trust-ncg') - c, lambd, k = transform(res.x) - fc = lambda c: f((c, lambd, k)) - a, b = fit_beta(c, fc) - self.params = dict(a=a, b=b, lambd=lambd, k=k) - - def predict(self, ts, ci=None): - a, b, lambd, k = self.params['a'], self.params['b'], self.params['lambd'], self.params['k'] - y = 1 - exp(-(ts*lambd)**k) - if ci is not None: - return ts, a / (a + b) * y, scipy.stats.beta.ppf((1 - ci)/2, a, b) * y, scipy.stats.beta.ppf((1 + ci)/2, a, b) * y - else: - return ts, y - - def predict_final(self, ci=None): - a, b = self.params['a'], self.params['b'] - if ci is not None: - return a / (a + b), scipy.stats.beta.ppf((1 - ci)/2, a, b), scipy.stats.beta.ppf((1 + ci)/2, a, b) - else: - return a / (a + b) - - def predict_time(self): - return gamma(1 + 1./self.params['k']) / self.params['lambd'] - - -class Gamma(Model): - def fit(self, C, N, B): - # TODO(erikbern): should compute Jacobian of this one - def transform(x): - p, q, r = x - return (expit(p), log1pexp(q), log1pexp(r)) - def f(x): - c, lambd, k = x - neg_LL = 0 - # PDF of gamma: 1.0 / gamma(k) * lambda ^ k * t^(k-1) * exp(-t * lambda) - LL_observed = log(c) - gammaln(k) + k*log(lambd) + (k-1)*log(C + LOG_EPS) - lambd*C - # CDF of gamma: gammainc(k, lambda * t) - LL_censored = log((1-c) + c * gammaincc(k, lambd*N) + LOG_EPS) - neg_LL = -sum(B * LL_observed + (1 - B) * LL_censored) - return neg_LL - - g = lambda x: f(transform(x)) - res = scipy.optimize.minimize( - fun=g, - x0=(0, 0, 0), - method='Nelder-Mead') - c, lambd, k = transform(res.x) - fc = lambda c: f((c, lambd, k)) - a, b = fit_beta(c, fc) - self.params = dict(a=a, b=b, lambd=lambd, k=k) - - def predict(self, ts, ci=None): - a, b, lambd, k = self.params['a'], self.params['b'], self.params['lambd'], self.params['k'] - y = gammainc(k, lambd*ts) - if ci is not None: - return ts, a / (a + b) * y, scipy.stats.beta.ppf((1 - ci)/2, a, b) * y, scipy.stats.beta.ppf((1 + ci)/2, a, b) * y - else: - return ts, y - - def predict_final(self, ci=None): - a, b = self.params['a'], self.params['b'] - if ci is not None: - return a / (a + b), scipy.stats.beta.ppf((1 - ci)/2, a, b), scipy.stats.beta.ppf((1 + ci)/2, a, b) - else: - return a / (a + b) - - def predict_time(self): - return self.params['k'] / self.params['lambd'] - - -def sample_event(model, t, hi=1e3): +def sample_event(model, x, t, hi=1e3): # We are now at time t. Generate a random event whether the user is going to convert or not # TODO: this is a hacky thing until we have a "invert CDF" method on each model def pred(t): ts = numpy.array([t]) - return model.predict(ts)[1][-1] + return model.predict(x, ts)[1][-1] y = pred(t) r = y + random.random() * (1 - y) if pred(hi) < r: @@ -291,9 +126,9 @@ def split_by_group(data, group_min_size, max_groups): _models = { 'kaplan-meier': KaplanMeier, - 'exponential': Exponential, - 'weibull': Weibull, - 'gamma': Gamma, + 'exponential': ExponentialRegression, + 'weibull': WeibullRegression, + 'gamma': GammaRegression, } def plot_cohorts(data, t_max=None, title=None, group_min_size=0, max_groups=100, model='kaplan-meier', projection=None): @@ -310,24 +145,24 @@ def plot_cohorts(data, t_max=None, title=None, group_min_size=0, max_groups=100, colors = seaborn.color_palette('hls', len(groups)) y_max = 0 for group, color in zip(sorted(groups), colors): - C, N, B = get_arrays(js[group], t_converter) + X, B, T = get_arrays(js[group], t_converter) t = numpy.linspace(0, t_max, 1000) m = _models[model]() - m.fit(C, N, B) + m.fit(X, B, T) label = '%s (n=%.0f, k=%.0f)' % (group, len(B), sum(B)) if projection: p = _models[projection]() - p.fit(C, N, B) - p_t, p_y, p_y_lo, p_y_hi = p.predict(t, ci=0.95) - p_y_final, p_y_lo_final, p_y_hi_final = p.predict_final(ci=0.95) + p.fit(X, B, T) + p_t, p_y, p_y_lo, p_y_hi = p.predict([1], t, ci=0.95) + p_y_final, p_y_lo_final, p_y_hi_final = p.predict_final([1], ci=0.95) label += ' projected: %.2f%% (%.2f%% - %.2f%%)' % (100.*p_y_final, 100.*p_y_lo_final, 100.*p_y_hi_final) pyplot.plot(p_t, 100. * p_y, color=color, linestyle=':', alpha=0.7) pyplot.fill_between(p_t, 100. * p_y_lo, 100. * p_y_hi, color=color, alpha=0.2) - m_t, m_y = m.predict(t) + m_t, m_y = m.predict([1], t) pyplot.plot(m_t, 100. * m_y, color=color, label=label) y_max = max(y_max, 110. * max(m_y)) @@ -370,17 +205,17 @@ def plot_timeseries(data, window, model='kaplan-meier', group_min_size=0, max_gr data = js[group][i1:i2] t1 += stride - C, N, B = get_arrays(data, t_converter) + X, B, T = get_arrays(data, t_converter) if sum(B) < window_min_size: continue p = _models[model]() - p.fit(C, N, B) + p.fit(X, B, T) if time: - y, y_lo, y_hi = p.predict_time(ci=0.95) + y, y_lo, y_hi = p.predict_time([1], ci=0.95) else: - y, y_lo, y_hi = p.predict_final(ci=0.95) + y, y_lo, y_hi = p.predict_final([1], ci=0.95) print('%30s %40s %.4f %.4f %.4f' % (group, t1, y, y_lo, y_hi)) ts.append(t2) ys.append(y) diff --git a/convoys/model.py b/convoys/model.py index ce96a12..0f7eb3f 100644 --- a/convoys/model.py +++ b/convoys/model.py @@ -4,21 +4,18 @@ @six.add_metaclass(abc.ABCMeta) class Model(): - def __init__(self, params={}): - self.params = params - @abc.abstractmethod - def fit(self, C, N, B): + def fit(self, X, B, T): pass @abc.abstractmethod - def predict(self, ts, ci=None): + def predict(self, x, ts, ci=None): pass @abc.abstractmethod - def predict_final(self, ci=None): + def predict_final(self, x, ci=None): pass @abc.abstractmethod - def predict_time(self, ci=None): + def predict_time(self, x, ci=None): pass diff --git a/convoys/regression.py b/convoys/regression.py index a5a2e4f..0d71271 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -14,9 +14,11 @@ def __init__(self, log_pdf, cdf, extra_params, L2_reg=1.0): self._extra_params = extra_params self._sess = tf.Session() - # TODO: this assumes scalar inputs... should figure out a way to support arbitrary shapes - T_input = tf.placeholder(tf.float32, []) - self._cdf_f = lambda t: self._sess.run(cdf(T_input), feed_dict={T_input: t}) + # TODO: this seems a bit dumb... is there no better way to support arbitrary number of dims? + T_scalar_input = tf.placeholder(tf.float32, []) + self._cdf_scalar_f = lambda t: self._sess.run(cdf(T_scalar_input), feed_dict={T_scalar_input: t}) + T_vector_input = tf.placeholder(tf.float32, [None]) + self._cdf_vector_f = lambda t: self._sess.run(cdf(T_vector_input), feed_dict={T_vector_input: t}) def __del__(self): self._sess.close() @@ -69,14 +71,18 @@ def fit(self, X, B, T): ) print(self.params) - def predict(self, t, x, ci=None): - z = self._cdf_f(t) + def predict(self, x, t, ci=None): + t = numpy.array(t) + if len(t.shape) == 0: + z = self._cdf_scalar_f(t) + elif len(t.shape) == 1: + z = self._cdf_vector_f(t) if ci: c, c_lo, c_hi = self.predict_final(x, ci) - return (c*z, c_lo*z, c_hi*z) + return (t, c*z, c_lo*z, c_hi*z) else: c = self.predict_final(x) - return c*z + return (t, c*z) def predict_final(self, x, ci=None): # TODO: should take advantage of tensorflow here!!! diff --git a/test_convoys.py b/test_convoys.py index ecb3c39..fb82216 100644 --- a/test_convoys.py +++ b/test_convoys.py @@ -9,56 +9,12 @@ import convoys.regression -def test_exponential_model(c=0.3, lambd=0.1, n=10000): - # With a really long observation window, the rate should converge to the measured - C = numpy.array([random.random() < c and scipy.stats.expon.rvs(scale=1.0/lambd) or 0.0 for x in range(n)]) - N = numpy.array([100 for converted_at in C]) - B = numpy.array([bool(converted_at > 0) for converted_at in C]) - c = numpy.mean(B) - model = convoys.Exponential() - model.fit(C, N, B) - assert 0.95*c < model.predict_final() < 1.05*c - assert 0.95*lambd < model.params['lambd'] < 1.05*lambd - - # Check the confidence intervals - y, y_lo, y_hi = model.predict_final(ci=0.95) - c_lo = scipy.stats.beta.ppf(0.025, n*c, n*(1-c)) - c_hi = scipy.stats.beta.ppf(0.975, n*c, n*(1-c)) - assert 0.95*c < y < 1.05 * c - assert 0.95*c_lo < y_lo < 1.05 * c_lo - assert 0.95*c_hi < y_hi < 1.05 * c_hi - - -def test_gamma_model(c=0.3, lambd=0.1, k=3.0, n=100000): - C = numpy.array([random.random() < c and scipy.stats.gamma.rvs(a=k, scale=1.0/lambd) or 0.0 for x in range(n)]) - N = numpy.array([1000 for converted_at in C]) - B = numpy.array([bool(converted_at > 0) for converted_at in C]) - c = numpy.mean(B) - model = convoys.Gamma() - model.fit(C, N, B) - assert 0.95*c < model.predict_final() < 1.05*c - assert 0.95*lambd < model.params['lambd'] < 1.05*lambd - assert 0.95*k < model.params['k'] < 1.05*k - - def sample_weibull(k, lambd): # scipy.stats is garbage for this # exp(-(x * lambda)^k) = y return (-numpy.log(random.random())) ** (1.0/k) / lambd -def test_weibull_model(c=0.3, lambd=0.1, k=0.5, n=100000): - B = numpy.array([bool(random.random() < c) for x in range(n)]) - C = numpy.array([b and sample_weibull(k, lambd) or 1.0 for b in B]) - N = numpy.array([1000 for b in B]) - c = numpy.mean(B) - model = convoys.Weibull() - model.fit(C, N, B) - assert 0.95*c < model.predict_final() < 1.05*c - assert 0.95*lambd < model.params['lambd'] < 1.05*lambd - assert 0.95*k < model.params['k'] < 1.05*k - - def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): # With a really long observation window, the rate should converge to the measured X = numpy.ones((n, 1)) @@ -71,7 +27,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): assert 0.95*lambd < model.params['lambd'] < 1.05*lambd t = 10 d = 1 - numpy.exp(-lambd*t) - assert 0.95*c*d < model.predict(t, [1]) < 1.05*c*d + assert 0.95*c*d < model.predict([1], t)[1] < 1.05*c*d # Check the confidence intervals y, y_lo, y_hi = model.predict_final([1], ci=0.95) From b92e6977ce2d30db9692c5a1f6513c1e9ea7ac68 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Mon, 19 Feb 2018 21:39:40 -0500 Subject: [PATCH 4/6] drop autograd --- convoys/__init__.py | 4 ---- requirements.txt | 1 - 2 files changed, 5 deletions(-) diff --git a/convoys/__init__.py b/convoys/__init__.py index b1cb182..c46fc1f 100644 --- a/convoys/__init__.py +++ b/convoys/__init__.py @@ -6,11 +6,7 @@ import numpy import random import seaborn -import scipy.optimize import six -from autograd import jacobian, hessian, grad -from autograd.scipy.special import expit, gamma, gammainc, gammaincc, gammaln -from autograd.numpy import exp, log, sum from matplotlib import pyplot from convoys.model import Model from convoys.regression import ExponentialRegression, WeibullRegression, GammaRegression diff --git a/requirements.txt b/requirements.txt index 4ba7edb..4d5f04f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -autograd==1.2 lifelines==0.11.2 matplotlib>=2.0.0 numpy From bffef76ed7affbd1bcb855c4f7396e5f2d8822ae Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Mon, 19 Feb 2018 21:44:49 -0500 Subject: [PATCH 5/6] remove print --- convoys/regression.py | 1 - 1 file changed, 1 deletion(-) diff --git a/convoys/regression.py b/convoys/regression.py index 0d71271..2ac994e 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -69,7 +69,6 @@ def fit(self, X, B, T): ), **self._extra_params(self._sess) ) - print(self.params) def predict(self, x, t, ci=None): t = numpy.array(t) From c152e19181968c8a40baf9fd70c1759ff2c68821 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Mon, 19 Feb 2018 22:08:53 -0500 Subject: [PATCH 6/6] more robust test & learning rate scheme --- convoys/regression.py | 13 ++++++++----- test_convoys.py | 11 +++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/convoys/regression.py b/convoys/regression.py index 2ac994e..437d8db 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -41,25 +41,28 @@ def fit(self, X, B, T): LL = tf.reduce_sum(B_input * LL_observed + (1 - B_input) * LL_censored, 0) LL_penalized = LL - self._L2_reg * tf.reduce_sum(beta * beta, 0) - step_var = tf.Variable(0, trainable=False) - learning_rate = tf.train.exponential_decay(0.03, step_var, 1, 0.999) - optimizer = tf.train.AdamOptimizer(learning_rate).minimize(-LL_penalized, global_step=step_var) + learning_rate_input = tf.placeholder(tf.float32, []) + optimizer = tf.train.AdamOptimizer(learning_rate_input).minimize(-LL_penalized) # TODO(erikbern): this is going to add more and more variables every time we run this self._sess.run(tf.global_variables_initializer()) best_cost, best_step, step = float('-inf'), 0, 0 + learning_rate = 0.1 while True: - feed_dict = {X_input: X, B_input: B, T_input: T} + feed_dict = {X_input: X, B_input: B, T_input: T, learning_rate_input: learning_rate} self._sess.run(optimizer, feed_dict=feed_dict) cost = self._sess.run(LL_penalized, feed_dict=feed_dict) if cost > best_cost: best_cost, best_step = cost, step if step - best_step > 100: + learning_rate /= 10 + best_cost = float('-inf') + if learning_rate < 1e-6: break step += 1 if step % 100 == 0: - print(step, cost, self._sess.run(learning_rate)) + print('step %6d (lr %6.6f): %9.2f' % (step, learning_rate, cost)) self.params = dict( beta=self._sess.run(beta), diff --git a/test_convoys.py b/test_convoys.py index fb82216..558314e 100644 --- a/test_convoys.py +++ b/test_convoys.py @@ -15,7 +15,7 @@ def sample_weibull(k, lambd): return (-numpy.log(random.random())) ** (1.0/k) / lambd -def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): +def test_exponential_regression_model(c=0.3, lambd=0.1, n=100000): # With a really long observation window, the rate should converge to the measured X = numpy.ones((n, 1)) B = numpy.array([bool(random.random() < c) for x in range(n)]) @@ -38,7 +38,7 @@ def test_exponential_regression_model(c=0.3, lambd=0.1, n=10000): assert 0.95*c_hi < y_hi < 1.05 * c_hi -def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=10000): +def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=100000): X = numpy.array([[1] + [r % len(cs) == j for j in range(len(cs))] for r in range(n)]) B = numpy.array([bool(random.random() < cs[r % len(cs)]) for r in range(n)]) T = numpy.array([b and sample_weibull(k, lambd) or 1000 for b in B]) @@ -49,7 +49,7 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=10000) assert 0.95 * c < model.predict_final(x) < 1.05 * c -def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=10000): +def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=100000): X = numpy.ones((n, 1)) B = numpy.array([bool(random.random() < c) for r in range(n)]) c = numpy.mean(B) @@ -61,11 +61,10 @@ def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=10000): c_lo = scipy.stats.beta.ppf(0.025, n*c, n*(1-c)) c_hi = scipy.stats.beta.ppf(0.975, n*c, n*(1-c)) assert 0.95*c < y < 1.05 * c - assert 0.95*c_lo < y_lo < 1.05 * c_lo - assert 0.95*c_hi < y_hi < 1.05 * c_hi + assert 0.95*(c_hi-c_lo) < (y_hi-y_lo) < 1.05 * (c_hi-c_lo) -def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=10000): +def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=100000): X = numpy.ones((n, 1)) B = numpy.array([bool(random.random() < c) for r in range(n)]) T = numpy.array([b and scipy.stats.gamma.rvs(a=k, scale=1.0/lambd) or 1000 for b in B])