Skip to content

Commit

Permalink
Merge 40c84aa into 2dbc6d8
Browse files Browse the repository at this point in the history
  • Loading branch information
erikbern committed Feb 19, 2018
2 parents 2dbc6d8 + 40c84aa commit 118f0f9
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 39 deletions.
145 changes: 107 additions & 38 deletions convoys/regression.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,131 @@
import numpy
import scipy.optimize
from autograd import jacobian, hessian, grad
from autograd.scipy.special import expit, gamma, gammainc, gammaincc, gammaln
from autograd.numpy import exp, log, sum, dot
import numpy # TODO: remove
from scipy.special import expit # TODO: remove
import scipy.stats
import tensorflow as tf

from convoys import Model

class WeibullRegression(Model):
# This will replace the Weibull model in __init__.py soon.
def __init__(self, L2_reg=1.0):
class Regression(Model):
# This will replace the model in __init__.py soon.
def __init__(self, log_pdf, cdf, extra_params, L2_reg=1.0):
self._L2_reg = L2_reg
self._log_pdf = log_pdf
self._cdf = cdf
self._extra_params = extra_params

def fit(self, X, B, T):
# TODO: should do this in constructor, but the shape of X isn't known at that point
n, k = X.shape
X = X.astype(numpy.float32)
def f(x):
lambd, k = exp(x[0]), exp(x[1])
beta = x[2:]
c = expit(dot(X, beta.T)) # Conversion rates for each example

# PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
LL_observed = log(c) + log(k) + log(lambd) + (k-1)*(log(T) + log(lambd)) - (T*lambd)**k
# CDF of Weibull: 1 - exp(-(t * lambda)^k)
LL_censored = log((1-c) + c * exp(-(T*lambd)**k))

LL = sum(B * LL_observed + (1 - B) * LL_censored) - self._L2_reg * dot(beta, beta)
return -LL

res = scipy.optimize.minimize(
fun=f,
jac=jacobian(f),
hess=hessian(f),
x0=numpy.zeros(k+2),
method='trust-ncg')
log_lambd, log_k = res.x[0], res.x[1]
beta = res.x[2:]
# Compute hessian of betas
beta_hessian = hessian(f)(res.x)[2:,2:]

X_input = tf.placeholder(tf.float32, [None, k])
B_input = tf.placeholder(tf.float32, [None])
T_input = tf.placeholder(tf.float32, [None])
beta = tf.Variable(tf.zeros([k]), 'beta')

X_prod_beta = tf.squeeze(tf.matmul(X_input, tf.expand_dims(beta, -1)), 1)
c = tf.sigmoid(X_prod_beta) # Conversion rates for each example

# PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
LL_observed = tf.log(c) + self._log_pdf(T_input)
# CDF of Weibull: 1 - exp(-(t * lambda)^k)
LL_censored = tf.log((1-c) + c * (1 - self._cdf(T_input)))

LL = tf.reduce_sum(B_input * LL_observed + (1 - B_input) * LL_censored, 0)
LL_penalized = LL - self._L2_reg * tf.reduce_sum(beta * beta, 0)

learning_rate = 1e-1
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(-LL_penalized)

self._sess = tf.Session() # TODO: free
init = tf.global_variables_initializer()
self._sess.run(init)

for epoch in range(500):
feed_dict = {X_input: X, B_input: B, T_input: T}
self._sess.run(optimizer, feed_dict=feed_dict)
cost = self._sess.run(LL_penalized, feed_dict=feed_dict)
print(cost)

self.params = dict(
lambd=exp(log_lambd),
k=exp(log_k),
beta=beta,
beta_hessian=beta_hessian
beta=self._sess.run(beta),
beta_hessian=self._sess.run(
-tf.hessians(LL_penalized, [beta])[0],
feed_dict=feed_dict,
),
**self._extra_params(self._sess)
)

def predict(self):
pass # TODO: implement

def predict_final(self, x, ci=None):
# TODO: should take advantage of tensorflow here!!!
x = numpy.array(x)
def f(x, d=0):
return expit(dot(x, self.params['beta']) + d)
return expit(numpy.dot(x, self.params['beta']) + d)
if ci:
inv_var = dot(dot(x.T, self.params['beta_hessian']), x)
inv_var = numpy.dot(numpy.dot(x.T, self.params['beta_hessian']), x)
lo, hi = (scipy.stats.norm.ppf(p, scale=inv_var**-0.5) for p in ((1 - ci)/2, (1 + ci)/2))
return f(x), f(x, lo), f(x, hi)
else:
return f(x)

def predict_time(self):
pass # TODO: implement


class ExponentialRegression(Regression):
def __init__(self, L2_reg=1.0):
log_lambd_var = tf.Variable(tf.zeros([]), 'log_lambd')
lambd = tf.exp(log_lambd_var)

log_pdf = lambda T: -T*lambd
cdf = lambda T: 1 - tf.exp(-(T * lambd))

return super(ExponentialRegression, self).__init__(
log_pdf=log_pdf,
cdf=cdf,
extra_params=lambda sess: dict(lambd=sess.run(lambd)),
L2_reg=L2_reg)


class WeibullRegression(Regression):
def __init__(self, L2_reg=1.0):
log_lambd_var = tf.Variable(tf.zeros([]), 'log_lambd')
log_k_var = tf.Variable(tf.zeros([]), 'log_k')

lambd = tf.exp(log_lambd_var)
k = tf.exp(log_k_var)

# PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
log_pdf = lambda T: tf.log(k) + tf.log(lambd) + (k-1)*(tf.log(T) + tf.log(lambd)) - (T*lambd)**k
# CDF of Weibull: 1 - exp(-(t * lambda)^k)
cdf = lambda T: 1 - tf.exp(-(T * lambd)**k)

return super(WeibullRegression, self).__init__(
log_pdf=log_pdf,
cdf=cdf,
extra_params=lambda sess: dict(k=sess.run(k),
lambd=sess.run(lambd)),
L2_reg=L2_reg)


class GammaRegression(Regression):
def __init__(self, L2_reg=1.0):
log_lambd_var = tf.Variable(tf.zeros([]), 'log_lambd')
log_k_var = tf.Variable(tf.zeros([]), 'log_k')

lambd = tf.exp(log_lambd_var)
k = tf.exp(log_k_var)

# PDF of gamma: 1.0 / gamma(k) * lambda ^ k * t^(k-1) * exp(-t * lambda)
log_pdf = lambda T: -tf.lgamma(k) + k*tf.log(lambd) + (k-1)*tf.log(T) - lambd*T
# CDF of gamma: gammainc(k, lambda * t)
cdf = lambda T: tf.igamma(k, lambd * T)

return super(GammaRegression, self).__init__(
log_pdf=log_pdf,
cdf=cdf,
extra_params=lambda sess: dict(k=sess.run(k),
lambd=sess.run(lambd)),
L2_reg=L2_reg)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ numpy
scipy
seaborn==0.8.1
six==1.11.0
tensorflow==1.6.0rc1
13 changes: 12 additions & 1 deletion test_convoys.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_weibull_regression_model(cs=[0.3, 0.5, 0.7], lambd=0.1, k=0.5, n=10000)
assert 0.95 * c < model.predict_final(x) < 1.05 * c


def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=1000):
def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=10000):
X = numpy.ones((n, 1))
B = numpy.array([bool(random.random() < c) for r in range(n)])
c = numpy.mean(B)
Expand All @@ -86,6 +86,17 @@ def test_weibull_regression_model_ci(c=0.3, lambd=0.1, k=0.5, n=1000):
assert 0.95*c_hi < y_hi < 1.05 * c_hi


def test_gamma_regression_model(c=0.3, lambd=0.1, k=3.0, n=10000):
X = numpy.ones((n, 1))
B = numpy.array([bool(random.random() < c) for r in range(n)])
T = numpy.array([b and scipy.stats.gamma.rvs(a=k, scale=1.0/lambd) or 1000 for b in B])
model = convoys.regression.GammaRegression()
model.fit(X, B, T)
assert 0.95*c < model.predict_final([1]) < 1.05*c
assert 0.95*lambd < model.params['lambd'] < 1.05*lambd
assert 0.95*k < model.params['k'] < 1.05*k


def _get_data(c=0.3, k=10, lambd=0.1, n=1000):
data = []
now = datetime.datetime(2000, 7, 1)
Expand Down

0 comments on commit 118f0f9

Please sign in to comment.