From 08409a87f72b96a04c767232d30958ebc07a328a Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Sun, 11 Mar 2018 15:33:29 -0400 Subject: [PATCH 1/2] restore variables to the best state after 40 steps of no improvements, higher initial learning rate --- convoys/regression.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/convoys/regression.py b/convoys/regression.py index a68e0f6..e0f3525 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -17,24 +17,30 @@ def _get_placeholders(n, k): ) -def _optimize(sess, target, feed_dict): +def _optimize(sess, target, feed_dict, variables): learning_rate_input = tf.placeholder(tf.float32, []) optimizer = tf.train.AdamOptimizer(learning_rate_input).minimize(-target) # TODO(erikbern): this is going to add more and more variables every time we run this sess.run(tf.global_variables_initializer()) - best_cost, best_step, step = float('-inf'), 0, 0 - learning_rate = 0.1 + best_step, step = 0, 0 + learning_rate = 1.0 + best_state = sess.run(variables) + best_cost = sess.run(target, feed_dict=feed_dict) + while True: feed_dict[learning_rate_input] = learning_rate sess.run(optimizer, feed_dict=feed_dict) cost = sess.run(target, feed_dict=feed_dict) if cost > best_cost: best_cost, best_step = cost, step + best_state = sess.run(variables) if step - best_step > 40: learning_rate /= 10 - best_cost = float('-inf') + best_step = step + for variable, value in zip(variables, best_state): + sess.run(tf.assign(variable, value)) if learning_rate < 1e-6: break step += 1 @@ -111,7 +117,7 @@ def fit(self, X, B, T): with tf.Session() as sess: feed_dict = {X_input: X, B_input: B, T_input: T} - _optimize(sess, LL_penalized, feed_dict) + _optimize(sess, LL_penalized, feed_dict, (alpha, beta)) self.params = _get_params(sess, {'beta': beta, 'alpha': alpha}) self.params['alpha_hessian'] = _get_hessian(sess, LL_penalized, alpha, feed_dict) self.params['beta_hessian'] = _get_hessian(sess, LL_penalized, beta, feed_dict) @@ -154,7 +160,7 @@ def fit(self, X, B, T): with tf.Session() as sess: feed_dict = {X_input: X, B_input: B, T_input: T} - _optimize(sess, LL_penalized, feed_dict) + _optimize(sess, LL_penalized, feed_dict, (alpha, beta, log_k_var)) self.params = _get_params(sess, {'beta': beta, 'alpha': alpha, 'k': k}) self.params['alpha_hessian'] = _get_hessian(sess, LL_penalized, alpha, feed_dict) self.params['beta_hessian'] = _get_hessian(sess, LL_penalized, beta, feed_dict) @@ -197,7 +203,7 @@ def fit(self, X, B, T): with tf.Session() as sess: feed_dict = {X_input: X, B_input: B, T_input: T} - _optimize(sess, LL_penalized, feed_dict) + _optimize(sess, LL_penalized, feed_dict, (alpha, beta, log_k_var)) self.params = _get_params(sess, {'beta': beta, 'alpha': alpha, 'k': k}) self.params['alpha_hessian'] = _get_hessian(sess, LL_penalized, alpha, feed_dict) self.params['beta_hessian'] = _get_hessian(sess, LL_penalized, beta, feed_dict) From 452df3da45fc16de3730e412aaea962bbfe06799 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Sun, 11 Mar 2018 15:39:08 -0400 Subject: [PATCH 2/2] no reason to have log_pdf and cdf as lambdas --- convoys/regression.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/convoys/regression.py b/convoys/regression.py index e0f3525..15e19a2 100644 --- a/convoys/regression.py +++ b/convoys/regression.py @@ -42,6 +42,7 @@ def _optimize(sess, target, feed_dict, variables): for variable, value in zip(variables, best_state): sess.run(tf.assign(variable, value)) if learning_rate < 1e-6: + sys.stdout.write('\n') break step += 1 sys.stdout.write('step %6d (lr %6.6f): %12.4f' % (step, learning_rate, cost)) @@ -106,11 +107,11 @@ def fit(self, X, B, T): lambd = tf.exp(X_prod_alpha) c = tf.sigmoid(X_prod_beta) - log_pdf = lambda T: tf.log(lambd) - T*lambd - cdf = lambda T: 1 - tf.exp(-(T * lambd)) + log_pdf = tf.log(lambd) - T_input*lambd + cdf = 1 - tf.exp(-(T_input * lambd)) - LL_observed = tf.log(c) + log_pdf(T_input) - LL_censored = tf.log((1-c) + c * (1 - cdf(T_input))) + LL_observed = tf.log(c) + log_pdf + LL_censored = tf.log((1-c) + c * (1 - cdf)) LL = tf.reduce_sum(B_input * LL_observed + (1 - B_input) * LL_censored, 0) LL_penalized = LL - self._L2_reg * tf.reduce_sum(beta * beta, 0) @@ -148,12 +149,12 @@ def fit(self, X, B, T): c = tf.sigmoid(X_prod_beta) # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k) - log_pdf = lambda T: tf.log(k) + tf.log(lambd) + (k-1)*(tf.log(T) + tf.log(lambd)) - (T*lambd)**k + log_pdf = tf.log(k) + tf.log(lambd) + (k-1)*(tf.log(T_input) + tf.log(lambd)) - (T_input*lambd)**k # CDF of Weibull: 1 - exp(-(t * lambda)^k) - cdf = lambda T: 1 - tf.exp(-(T * lambd)**k) + cdf = 1 - tf.exp(-(T_input * lambd)**k) - LL_observed = tf.log(c) + log_pdf(T_input) - LL_censored = tf.log((1-c) + c * (1 - cdf(T_input))) + LL_observed = tf.log(c) + log_pdf + LL_censored = tf.log((1-c) + c * (1 - cdf)) LL = tf.reduce_sum(B_input * LL_observed + (1 - B_input) * LL_censored, 0) LL_penalized = LL - self._L2_reg * tf.reduce_sum(beta * beta, 0) @@ -191,12 +192,12 @@ def fit(self, X, B, T): c = tf.sigmoid(X_prod_beta) # PDF of gamma: 1.0 / gamma(k) * lambda ^ k * t^(k-1) * exp(-t * lambda) - log_pdf = lambda T: -tf.lgamma(k) + k*tf.log(lambd) + (k-1)*tf.log(T) - lambd*T + log_pdf = -tf.lgamma(k) + k*tf.log(lambd) + (k-1)*tf.log(T_input) - lambd*T_input # CDF of gamma: gammainc(k, lambda * t) - cdf = lambda T: tf.igamma(k, lambd * T) + cdf = tf.igamma(k, lambd * T_input) - LL_observed = tf.log(c) + log_pdf(T_input) - LL_censored = tf.log((1-c) + c * (1 - cdf(T_input))) + LL_observed = tf.log(c) + log_pdf + LL_censored = tf.log((1-c) + c * (1 - cdf)) LL = tf.reduce_sum(B_input * LL_observed + (1 - B_input) * LL_censored, 0) LL_penalized = LL - self._L2_reg * tf.reduce_sum(beta * beta, 0)