Skip to content

Commit

Permalink
some more random stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Erik Bernhardsson committed Mar 19, 2018
1 parent 075324e commit 762e481
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 16 deletions.
3 changes: 0 additions & 3 deletions convoys/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
from convoys import tf_utils


tf.logging.set_verbosity(3)


class RegressionModel:
def __init__(self, L2_reg=1.0):
self._L2_reg = L2_reg
Expand Down
34 changes: 22 additions & 12 deletions convoys/single.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import bisect
import lifelines
import numpy
from scipy.special import expit
import tensorflow as tf
from convoys import tf_utils

Expand Down Expand Up @@ -46,7 +47,7 @@ def median(ps):


class Nonparametric(SingleModel):
def fit(self, B, T, n=100):
def fit(self, B, T, n=2):
# We're going to fit c and p_0, p_1, ...
# so that the probability of conversion at time i is c * (1 - p_0) * ... p_i
# What's the total likelihood
Expand All @@ -59,12 +60,12 @@ def fit(self, B, T, n=100):
all_ts = list(sorted(t for b, t in zip(B, T) if b))
n = min(n, len(all_ts))
js = [int(round(1.0 * len(all_ts) * (z + 1) / n - 1)) for z in range(n)]
ts = [all_ts[j] for j in js]
self.ts = [all_ts[j] for j in js]
self.get_j = numpy.vectorize(lambda t: min(bisect.bisect_left(self.ts, t), n-1))
count_observed = numpy.zeros((n,), dtype=numpy.float32)
count_unobserved = numpy.zeros((n,), dtype=numpy.float32)
for i, (b, t) in enumerate(zip(B, T)):
j = bisect.bisect_left(ts, t)
j = min(j, n-1)
j = self.get_j(t)
if b:
count_observed[j] += 1
else:
Expand All @@ -88,13 +89,22 @@ def fit(self, B, T, n=100):
self.params = {
'beta': sess.run(beta),
'z': sess.run(z),
'beta_hessian': tf_utils.get_hessian(sess, LL, beta),
'z_hessian': tf_utils.get_hessian(sess, LL, z),
'beta_cov': 1. / tf_utils.get_hessian(sess, LL, beta),
'z_cov': numpy.linalg.inv(tf_utils.get_hessian(sess, LL, z)),
}
print(self.params)

def predict(self, x, t, ci=None, n=1000):
t = _fix_t(t)
x_prod_alpha = _sample_hessian(x, self.params['alpha'], self.params['alpha_hessian'], n, ci)
x_prod_beta = _sample_hessian(x, self.params['beta'], self.params['beta_hessian'], n, ci)
return tf_utils.predict(expit(x_prod_beta) * (1 - numpy.exp(-t * numpy.exp(x_prod_alpha))), ci)
def predict(self, t, ci=None, n=1000):
t = tf_utils.fix_t(t)
if ci:
betas = numpy.random.normal(self.params['beta'], self.params['beta_cov'], n)
zs = numpy.random.multivariate_normal(self.params['z'], self.params['z_cov'], n).T
else:
betas = self.params['beta']
zs = self.params['z']

c = expit(betas)
log_survived_until = numpy.cumsum(numpy.log(expit(-zs)), axis=0) # todo: should use exclusive=True I think?
f = c * (1 - numpy.exp(log_survived_until))
p = tf_utils.predict(f, ci)
j = self.get_j(t)
print(j)
3 changes: 2 additions & 1 deletion convoys/tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys
import tensorflow as tf

tf.logging.set_verbosity(3)


def get_hessian(sess, f, param):
return sess.run(tf.hessians(-f, [param]))[0]
Expand Down Expand Up @@ -72,4 +74,3 @@ def predict(func_values, ci):
else:
axis = len(func_values.shape)-1
return numpy.mean(func_values, axis=axis), numpy.percentile(func_values, (1-ci)*50, axis=axis), numpy.percentile(func_values, (1+ci)*50, axis=axis)

0 comments on commit 762e481

Please sign in to comment.