Skip to content
Permalink
Browse files

Merge branch 'master' of https://github.com/zeynepakkalyoncu/Tardis

  • Loading branch information...
zeynepakkalyoncu committed Dec 19, 2018
2 parents c472544 + c1fdfa4 commit 2f6edb1c1e245949bc3366d162d6af907e3804f0
Showing with 97 additions and 98 deletions.
  1. +1 −0 lib/data/util.py
  2. +4 −5 lib/model/__main__.py
  3. +92 −93 lib/model/seq2seq.py
@@ -84,6 +84,7 @@ def build_indices(source_data, target_data, source_vocab, target_vocab, one_hot)
decoder_target_data[i, j - 1] = target_vocab[word]
return encoder_input_data, decoder_input_data, decoder_target_data


def trim_sentences(sentences):
trimmed_sentences = list()
for sentence in sentences:
@@ -30,13 +30,12 @@
if not args.cpu:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
config.log_device_placement = False
sess = tf.Session(config=config)

set_session(sess)

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = args.devices
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = args.devices

if args.dataset == 'en_de':
encoder_train_input, decoder_train_input, decoder_train_target, source_vocab, target_vocab = \
@@ -138,4 +137,4 @@

else:
model.train_generator(training_generator, validation_generator)
model.evaluate(encoder_test_input, decoder_test_input, raw_test_target)
model.evaluate(encoder_test_input, raw_test_target)
@@ -17,113 +17,118 @@
class Seq2Seq:
def __init__(self, config):
self.config = config
recurrent_unit = self.config.recurrent_unit.lower()

if self.config.cpu:
devices = list('/cpu:' + str(x) for x in (0, 0))
else:
devices = list('/gpu:' + x for x in config.devices)

if self.config.ensemble:
inputs = Input(shape=(None,))
reconstructed_inputs = Reshape((128,), input_shape=(self.config.dataset_size,))(inputs)
with tf.device(devices[0]):
initial_weights = RandomUniform(minval=-0.08, maxval=0.08, seed=config.seed)
encoder_inputs = Input(shape=(None,))
reconstructed_inputs = Reshape((128,), input_shape=(config.dataset_size,))
encoder_embedding = Embedding(config.source_vocab_size, config.embedding_dim,
weights=[config.source_embedding_map], trainable=False)
# TODO: set indices dynamically
encoder_inputs = Lambda(lambda x: x[:, :50])(reconstructed_inputs)
encoder_states = self.encode(encoder_inputs, recurrent_unit=self.config.recurrent_unit)

decoder_inputs = Lambda(lambda x: x[:, 50:])(reconstructed_inputs)

decoder_outputs = self.decode(decoder_inputs, encoder_states, recurrent_unit=self.config.recurrent_unit)

# decoder_reshape = Reshape((128, self.config.target_vocab_size)) #?
# decoder_slice = Lambda(lambda x: x[:, 50:, :])

# decoder_outputs = decoder_reshape(decoder_outputs)
# decoder_outputs = decoder_slice(decoder_outputs)
else:
# Encoder
with tf.device(devices[0]):
encoder_inputs = Input(shape=(None, ))
encoder_states = self.encode(encoder_inputs, recurrent_unit=self.config.recurrent_unit)
# Decoder
with tf.device(devices[1]):
decoder_inputs = Input(shape=(None, ))
decoder_outputs = self.decode(decoder_inputs, encoder_states, recurrent_unit=self.config.recurrent_unit)

# Input: Source and target sentence, Output: Predicted translation
# self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
self.model = Model(inputs, decoder_outputs)
optimizer = Adam(lr=self.config.lr, clipnorm=25.)
encoder_embedded = encoder_embedding(encoder_inputs)

if recurrent_unit == 'lstm':
encoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True,
recurrent_initializer=initial_weights)(encoder_embedded)
for i in range(1, self.config.num_encoder_layers):
encoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(encoder)
_, state_h, state_c = encoder
encoder_states = [state_h, state_c]
else:
encoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True,
recurrent_initializer=initial_weights)(encoder_embedded)
for i in range(1, self.config.num_encoder_layers):
encoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(encoder)
_, state_h = encoder
encoder_states = [state_h]

with tf.device(devices[1]):
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(config.target_vocab_size, config.embedding_dim,
weights=[config.target_embedding_map], trainable=False)
decoder_embedded = decoder_embedding(decoder_inputs)

if recurrent_unit.lower() == 'lstm':
decoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder_embedded,
initial_state=encoder_states) # Accepts concatenated encoder states as input
for i in range(1, self.config.num_decoder_layers):
decoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(
decoder) # Use the final encoder state as context
decoder_outputs, decoder_states = decoder[0], decoder[1:]
else:
decoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder_embedded,
initial_state=encoder_states) # Accepts concatenated encoder states as input
for i in range(1, self.config.num_decoder_layers):
decoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(
decoder) # Use the final encoder state as context
decoder_outputs, decoder_states = decoder[0], decoder[1]

decoder_dense = Dense(config.target_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
optimizer = Adam(lr=config.lr, clipnorm=25.)
self.model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc'])

print(self.model.summary())

def encode(self, encoder_inputs, recurrent_unit='lstm'):
initial_weights = RandomUniform(minval=-0.08, maxval=0.08, seed=self.config.seed)
encoder_embedding = Embedding(self.config.source_vocab_size, self.config.embedding_dim,
weights=[self.config.source_embedding_map], trainable=False)
encoder_embedded = encoder_embedding(encoder_inputs)
if recurrent_unit.lower() == 'lstm':
encoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True, recurrent_initializer=initial_weights)(encoder_embedded)
for i in range(1, self.config.num_encoder_layers):
encoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(encoder)
_, state_h, state_c = encoder
return [state_h, state_c]
else:
encoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True, recurrent_initializer=initial_weights)(encoder_embedded)
for i in range(1, self.config.num_encoder_layers):
encoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(encoder)
_, state_h = encoder
return [state_h]

def decode(self, decoder_inputs, encoder_states, recurrent_unit='lstm'):
decoder_embedding = Embedding(self.config.target_vocab_size, self.config.embedding_dim,
weights=[self.config.target_embedding_map], trainable=False)
decoder_embedded = decoder_embedding(decoder_inputs)
if recurrent_unit.lower() == 'lstm':
decoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder_embedded, initial_state=encoder_states) # Accepts concatenated encoder states as input
for i in range(1, self.config.num_decoder_layers):
decoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder) # Use the final encoder state as context
decoder_outputs, decoder_states = decoder[0], decoder[1:]
else:
decoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder_embedded, initial_state=encoder_states) # Accepts concatenated encoder states as input
for i in range(1, self.config.num_decoder_layers):
decoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder) # Use the final encoder state as context
decoder_outputs, decoder_states = decoder[0], decoder[1]
decoder_dense = Dense(self.config.target_vocab_size, activation='softmax')
return decoder_dense(decoder_outputs)

def train(self, encoder_train_input, decoder_train_input, decoder_train_target):
checkpoint_filename = \
'ep{epoch:02d}_el%d_dl%d_ds%d_sv%d_tv%d.hdf5' % (self.config.num_encoder_layers, self.config.num_decoder_layers, self.config.dataset_size,
self.config.source_vocab_size, self.config.target_vocab_size)
time_callback = TimeHistory()
callbacks = [lr_scheduler(initial_lr=self.config.lr, decay_factor=self.config.decay), time_callback,
ModelCheckpoint(os.path.join(os.getcwd(), 'data', 'checkpoints', self.config.dataset, checkpoint_filename),
monitor='val_loss', verbose=1, save_best_only=False,
save_weights_only=True, mode='auto', period=1)]
'%s_el%d_dl%d_ds%d_sv%d_tv%d_ep{epoch:02d}.hdf5' % (self.config.recurrent_unit.lower(),
self.config.num_encoder_layers,
self.config.num_decoder_layers,
self.config.dataset_size,
self.config.source_vocab_size,
self.config.target_vocab_size)
time_history_callback = TimeHistory()
callbacks = [time_history_callback,
lr_scheduler(initial_lr=self.config.lr, decay_factor=self.config.decay),
ModelCheckpoint(os.path.join('data', 'checkpoints', self.config.dataset, checkpoint_filename),
monitor='val_loss',
verbose=1,
save_best_only=False,
save_weights_only=True,
mode='auto',
period=1)]
self.model.fit([encoder_train_input, decoder_train_input], decoder_train_target,
batch_size=self.config.batch_size,
epochs=self.config.epochs,
validation_split=0.20,
callbacks=callbacks)
print("Training time (in seconds):", time_history_callback.times)


def train_generator(self, training_generator, validation_generator):
checkpoint_filename = \
'ep{epoch:02d}_el%d_dl%d_ds%d_sv%d_tv%d.hdf5' % (self.config.num_encoder_layers, self.config.num_decoder_layers, self.config.dataset_size,
self.config.source_vocab_size, self.config.target_vocab_size)
time_callback = TimeHistory()
callbacks = [lr_scheduler(initial_lr=self.config.lr, decay_factor=self.config.decay), time_callback,
ModelCheckpoint(os.path.join(os.getcwd(), 'data', 'checkpoints', self.config.dataset, checkpoint_filename),
monitor='val_loss', verbose=1, save_best_only=False,
save_weights_only=True, mode='auto', period=1)]
self.model.fit_generator(training_generator, epochs=self.config.epochs, callbacks=callbacks,
'%s_el%d_dl%d_ds%d_sv%d_tv%d_ep{epoch:02d}.hdf5' % (self.config.recurrent_unit.lower(),
self.config.num_encoder_layers,
self.config.num_decoder_layers,
self.config.dataset_size,
self.config.source_vocab_size,
self.config.target_vocab_size)
time_history_callback = TimeHistory()
callbacks = [time_history_callback,
lr_scheduler(initial_lr=self.config.lr, decay_factor=self.config.decay),
ModelCheckpoint(os.path.join('data', 'checkpoints', self.config.dataset, checkpoint_filename),
monitor='val_loss',
verbose=1,
save_best_only=False,
save_weights_only=True,
mode='auto',
period=1)]
self.model.fit_generator(training_generator,
epochs=self.config.epochs,
callbacks=callbacks,
validation_data=validation_generator)
print("Training time (in seconds):", time_callback.times)

def predict(self, encoder_predict_input, decoder_predict_input):
return self.model.predict([encoder_predict_input, decoder_predict_input])
print("Training time (in seconds):", time_history_callback.times)

def beam_search(self, encoder_predict_input):
def predict(self, encoder_predict_input):
beam_size = self.config.beam_size
max_target_len = encoder_predict_input.shape[0]
k_beam = [(0, [0] * max_target_len)]
@@ -138,17 +143,11 @@ def beam_search(self, encoder_predict_input):
sum(np.log(predicted[j, 0, hyp[j + 1]]) for j in range(i)) + np.log(predicted[i, 0, next_hyp]),
list(hyp[:(i + 1)]) + [next_hyp] + ([0] * (encoder_predict_input.shape[0] - i - 1))
))

k_beam = sorted(all_hypotheses, key=lambda x: x[0])[-beam_size:] # Sort by probability

return k_beam[-1][1] # Pick hypothesis with highest probability

def evaluate(self, encoder_predict_input, decoder_predict_input, decoder_train_target):
if self.config.beam_size > 0:
y_pred = np.apply_along_axis(self.beam_search, 1, encoder_predict_input)
else:
y_pred = self.predict(encoder_predict_input, decoder_predict_input)
y_pred = np.argmax(y_pred, axis=-1)
print("BLEU Score:", bleu_score(y_pred, decoder_train_target))
def evaluate(self, encoder_predict_input, decoder_predict_target):
y_pred = np.apply_along_axis(self.predict, 1, encoder_predict_input)
print("BLEU Score:", bleu_score(decoder_predict_target, y_pred))
# An error in the sacrebleu library prevents multi_bleu_score from working on WMT '14 EN-DE test split
# print("Multi-BLEU Score", multi_bleu_score(y_pred, self.config.target_vocab, self.config.dataset))

0 comments on commit 2f6edb1

Please sign in to comment.
You can’t perform that action at this time.