Skip to content

Commit

Permalink
config update and learning rate decay update
Browse files Browse the repository at this point in the history
  • Loading branch information
carpedm20 committed Jan 13, 2017
1 parent 77e9032 commit 763adf4
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 43 deletions.
5 changes: 3 additions & 2 deletions config.py
Expand Up @@ -19,6 +19,8 @@ def add_argument_group(name):
net_arg.add_argument('--input_dim', type=int, default=2, help='')
net_arg.add_argument('--max_enc_length', type=int, default=20, help='')
net_arg.add_argument('--max_dec_length', type=int, default=33, help='')
net_arg.add_argument('--init_min_val', type=float, default=-0.08, help='for uniform random initializer')
net_arg.add_argument('--init_max_val', type=float, default=+0.08, help='for uniform random initializer')
net_arg.add_argument('--num_glimpse', type=int, default=1, help='')
net_arg.add_argument('--use_terminal_symbol', type=str2bool, default=True, help='Not implemented yet')

Expand All @@ -34,10 +36,9 @@ def add_argument_group(name):
train_arg.add_argument('--is_train', type=str2bool, default=True, help='')
train_arg.add_argument('--optimizer', type=str, default='rmsprop', help='')
train_arg.add_argument('--max_step', type=int, default=10000, help='')
train_arg.add_argument('--reg_scale', type=float, default=0.5, help='')
train_arg.add_argument('--batch_size', type=int, default=512, help='')
train_arg.add_argument('--lr_start', type=float, default=0.001, help='')
train_arg.add_argument('--lr_decay_step', type=int, default=5000, help='')
train_arg.add_argument('--lr_decay_rate', type=float, default=0.96, help='')
train_arg.add_argument('--max_grad_norm', type=float, default=1.0, help='')
train_arg.add_argument('--checkpoint_secs', type=int, default=300, help='')

Expand Down
25 changes: 16 additions & 9 deletions data_loader.py
@@ -1,6 +1,8 @@
# Most of the codes are from https://github.com/vshallc/PtrNets/blob/master/pointer/misc/tsp.py
import os
import np as np
import itertools
import numpy as np
from tqdm import trange

def length(x, y):
return np.linalg.norm(np.asarray(x) - np.asarray(y))
Expand All @@ -24,10 +26,11 @@ def solve_tsp_dynamic(points):
def generate_one_example(n_nodes):
nodes = np.random.rand(n_nodes, 2)
res = solve_tsp_dynamic(nodes)
return nodes, res

def generate_examples(num, n_min, n_max):
def generate_examples(num, n_min, n_max, desc=""):
examples = []
for i in range(num):
for i in trange(num, desc=desc):
n_nodes = np.random.randint(n_min, n_max + 1)
nodes, res = generate_one_example(n_nodes)
examples.append((nodes, res))
Expand All @@ -45,18 +48,22 @@ def __init__(self, config, rng=None):
self.task_name = "{}_{}_{}".format(self.task, self.min_length, self.max_length)
self.npz_path = os.path.join(config.data_dir, "{}.npz".format(self.task_name))

def maybe_generate_and_save(self):
self._maybe_generate_and_save()

def _maybe_generate_and_save(self):
if not os.path.exists(self.npz_path):
print("[*] Creating dataset for {}".format(self.task))

train = generate_examples(1048576, self.min_length, self.max_length)
valid = generate_examples(1000, self.min_length, self.max_length)
test = generate_examples(1000, self.max_length, self.max_length)
train = generate_examples(
1000000, self.min_length, self.max_length, "Train data..")
valid = generate_examples(
1000, self.min_length, self.max_length, "Valid data..")
test = generate_examples(
1000, self.max_length, self.max_length, "Test data..")

np.savez(self.npz_path, train=train, test=test, valid=valid)
else:
print("[*] Loading dataset for {}".format(self.task))
data = np.load(self.npz_path, train=, test=, val=)
data = np.load(self.npz_path)
self.train, self.test, self.valid = \
data['train'], data['test'], data['valid']

18 changes: 10 additions & 8 deletions layers.py
Expand Up @@ -4,7 +4,6 @@
from tensorflow.contrib import seq2seq
from tensorflow.python.util import nest

linear = layers.linear
LSTMCell = rnn.LSTMCell
MultiRNNCell = rnn.MultiRNNCell
dynamic_rnn_decoder = seq2seq.dynamic_rnn_decoder
Expand All @@ -13,16 +12,20 @@
def decoder_rnn(cell, inputs,
enc_outputs, enc_final_states,
seq_length, hidden_dim, num_glimpse,
max_dec_length, batch_size, is_train, end_of_sequence_id=0):
max_dec_length, batch_size, is_train,
end_of_sequence_id=0, initializer=None):
with tf.variable_scope("decoder_rnn") as scope:
first_decoder_input = trainable_initial_state(
batch_size, hidden_dim, name="first_decoder_input")

def attention(ref, query, with_softmax=True, scope="attention"):
with tf.variable_scope(scope):
W_ref = tf.get_variable("W_ref", [1, hidden_dim, hidden_dim])
W_q = tf.get_variable("W_q", [hidden_dim, hidden_dim])
v = tf.get_variable("v", [hidden_dim])
W_ref = tf.get_variable(
"W_ref", [1, hidden_dim, hidden_dim], initializer=initializer)
W_q = tf.get_variable(
"W_q", [hidden_dim, hidden_dim], initializer=initializer)
v = tf.get_variable(
"v", [hidden_dim], initializer=initializer)

encoded_ref = tf.nn.conv1d(ref, W_ref, 1, "VALID")
encoded_query = tf.matmul(tf.reshape(query, [-1, hidden_dim]), W_q)
Expand Down Expand Up @@ -85,7 +88,7 @@ def decoder_fn_inference(
output_logit = output_fn(enc_outputs, output, num_glimpse)
scope.reuse_variables()
output_logits.append(output_logit)
outputs = tf.stack(output_logits, 1)
outputs = tf.stack(output_logits, axis=1)

return outputs, final_state, final_context_state

Expand All @@ -99,7 +102,6 @@ def trainable_initial_state(batch_size, state_size,
flat_initializer = tuple(tf.zeros_initializer for initializer in flat_state_size)

names = ["{}_{}".format(name, i) for i in xrange(len(flat_state_size))]

tiled_states = []

for name, size, init in zip(names, flat_state_size, flat_initializer):
Expand All @@ -118,4 +120,4 @@ def index_matrix_to_pairs(index_matrix):
replicated_first_indices = tf.tile(
tf.expand_dims(tf.range(tf.shape(index_matrix)[0]), dim=1),
[1, tf.shape(index_matrix)[1]])
return tf.pack([replicated_first_indices, index_matrix], axis=2)
return tf.stack([replicated_first_indices, index_matrix], axis=2)
83 changes: 60 additions & 23 deletions model.py
Expand Up @@ -5,7 +5,7 @@
from utils import show_all_variables

class Model(object):
def __init__(self, config, data_loader):
def __init__(self, config, data_loader, is_critic=False):
self.data_loader = data_loader

self.task = config.task
Expand All @@ -15,33 +15,46 @@ def __init__(self, config, data_loader):
self.input_dim = config.input_dim
self.hidden_dim = config.hidden_dim
self.num_layers = config.num_layers

self.max_enc_length = config.max_enc_length
self.max_dec_length = config.max_dec_length
self.num_glimpse = config.num_glimpse

self.init_min_val = config.init_min_val
self.init_max_val = config.init_max_val
self.initializer = \
tf.random_uniform_initializer(self.init_min_val, self.init_max_val)

self.use_terminal_symbol = config.use_terminal_symbol

self.reg_scale = config.reg_scale
self.lr_start = config.lr_start
self.lr_decay_step = config.lr_decay_step
self.lr_decay_rate = config.lr_decay_rate
self.max_grad_norm = config.max_grad_norm
self.batch_size = config.batch_size

self.layer_dict = {}

with arg_scope([linear, LSTMCell], \
initializer=tf.random_normal_initializer(0, 0.001)):
self._build_model()
self._build_model()
if is_critic:
self._build_critic_model()

self._build_optim()
self._build_summary()

show_all_variables()

def _build_summary(self):
tf.summary.scalar("learning_rate", self.lr)

def _build_critic_model(self):
pass

def _build_model(self):
self.global_step = tf.Variable(0, trainable=False)

initializer = None
input_weight = tf.get_variable(
"input_weight", [1, self.input_dim, self.hidden_dim])
"input_weight", [1, self.input_dim, self.hidden_dim],
initializer=self.initializer)

with tf.variable_scope("encoder"):
self.enc_seq_length = tf.placeholder(
Expand All @@ -53,22 +66,27 @@ def _build_model(self):
self.enc_inputs, input_weight, 1, "VALID")

batch_size = tf.shape(self.enc_inputs)[0]
tiled_zeros = tf.tile(tf.zeros(
[1, self.hidden_dim]), [batch_size, 1], name="tiled_zeros")

with tf.variable_scope("encoder"):
self.enc_cell = LSTMCell(self.hidden_dim)
self.enc_cell = LSTMCell(
self.hidden_dim,
initializer=self.initializer)

if self.num_layers > 1:
cells = [self.enc_cell] * self.num_layers
self.enc_cell = MultiRNNCell(cells)
self.enc_init_state = trainable_initial_state(batch_size, self.enc_cell.state_size)
self.enc_init_state = trainable_initial_state(
batch_size, self.enc_cell.state_size)

# self.encoder_outputs : [None, max_time, output_size]
self.enc_outputs, self.enc_final_states = tf.nn.dynamic_rnn(
self.enc_cell, self.transformed_enc_inputs, self.enc_seq_length, self.enc_init_state)
self.enc_cell, self.transformed_enc_inputs,
self.enc_seq_length, self.enc_init_state)

if self.use_terminal_symbol:
self.enc_outputs = [tiled_zeros] + self.enc_outputs
tiled_zeros = tf.tile(tf.zeros(
[1, self.hidden_dim]), [batch_size, 1], name="tiled_zeros")
expanded_tiled_zeros = tf.expand_dims(tiled_zeros, axis=1)
self.enc_outputs = tf.concat_v2([expanded_tiled_zeros, self.enc_outputs], axis=1)

with tf.variable_scope("dencoder"):
#self.first_decoder_input = \
Expand All @@ -86,19 +104,28 @@ def _build_model(self):

idx_pairs = index_matrix_to_pairs(self.dec_idx_inputs)
self.dec_inputs = tf.gather_nd(self.enc_inputs, idx_pairs)
self.transformed_dec_inputs = tf.gather_nd(self.transformed_enc_inputs, idx_pairs)
self.transformed_dec_inputs = \
tf.gather_nd(self.transformed_enc_inputs, idx_pairs)

#dec_inputs = [
# tf.expand_dims(self.first_decoder_input, 1),
# dec_inputs_without_first,
#]
#self.dec_inputs = tf.concat_v2(dec_inputs, axis=1)

self.dec_targets = tf.placeholder(tf.float32,
[None, self.max_enc_length + 1], name="dec_targets")
if self.use_terminal_symbol:
dec_target_dims = [None, self.max_enc_length + 1]
else:
dec_target_dims = [None, self.max_enc_length]

self.dec_targets = tf.placeholder(
tf.int32, dec_target_dims, name="dec_targets")
self.is_train = tf.placeholder(tf.bool, name="is_train")

self.dec_cell = LSTMCell(self.hidden_dim)
self.dec_cell = LSTMCell(
self.hidden_dim,
initializer=self.initializer)

if self.num_layers > 1:
cells = [self.dec_cell] * self.num_layers
self.dec_cell = MultiRNNCell(cells)
Expand All @@ -107,19 +134,29 @@ def _build_model(self):
self.dec_cell, self.transformed_dec_inputs,
self.enc_outputs, self.enc_final_states,
self.enc_seq_length, self.hidden_dim, self.num_glimpse,
self.max_dec_length, batch_size, is_train=True)
self.max_dec_length, batch_size, is_train=True,
initializer=self.initializer)

with tf.variable_scope("dencoder", reuse=True):
self.dec_outputs, _, self.predictions = decoder_rnn(
self.dec_cell, self.transformed_dec_inputs,
self.enc_outputs, self.enc_final_states,
self.enc_seq_length, self.hidden_dim, self.num_glimpse,
self.max_dec_length, batch_size, is_train=False)
self.max_dec_length, batch_size, is_train=False,
initializer=self.initializer)

def _build_optim(self):
self.loss = tf.reduce_mean(self.output - self.targets)
self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=self.dec_output_logits, labels=self.dec_targets)

# TODO: length masking
#mask = tf.sign(tf.to_float(targets_flat))
#masked_losses = mask * self.loss

self.lr = tf.train.exponential_decay(
self.lr_start, self.global_step, self.lr_decay_step,
self.lr_decay_rate, staircase=True, name="learning_rate")

self.lr = tf.Variable(self.lr_start)
optimizer = tf.train.AdamOptimizer(self.lr)

if self.max_grad_norm != None:
Expand Down
2 changes: 1 addition & 1 deletion trainer.py
Expand Up @@ -41,7 +41,7 @@ def _build_session(self):
summary_writer=self.summary_writer,
save_summaries_secs=300,
save_model_secs=self.checkpoint_secs,
global_step=self.model.discrim_step)
global_step=self.model.global_step)

gpu_options = tf.GPUOptions(
per_process_gpu_memory_fraction=self.gpu_memory_fraction,
Expand Down

0 comments on commit 763adf4

Please sign in to comment.