In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
tf.enable_eager_execution()
import importlib
import os

from matplotlib import pyplot as plt
%matplotlib notebook

from transliteration import data, train, model_one, script, decode, evaluate

In [2]:
importlib.reload(data)
batch_size = 128
cmu_train_dataset = data.make_dataset('../data/tfrecord/cmu_train.tfrecord',
                                  from_script='en',
                                  to_script='cmu',
                                  batch_size=batch_size)
cmu_valid_dataset = data.make_dataset('../data/tfrecord/cmu_valid.tfrecord',
                                  from_script='en',
                                  to_script='cmu',
                                  batch_size=batch_size)
cmu_test_dataset = data.make_dataset('../data/tfrecord/cmu_test.tfrecord',
                                 from_script='en',
                                 to_script='cmu',
                                 batch_size=batch_size)
eob_train_dataset = data.make_dataset('../data/tfrecord/eob_train.tfrecord',
                                       from_script='en',
                                       to_script='ja',
                                       batch_size=batch_size)
eob_valid_dataset = data.make_dataset('../data/tfrecord/eob_valid.tfrecord',
                                       from_script='en',
                                       to_script='ja',
                                       batch_size=batch_size)
eob_test_dataset = data.make_dataset('../data/tfrecord/eob_test.tfrecord',
                                      from_script='en',
                                      to_script='ja',
                                      batch_size=batch_size)

In [3]:
optimizer = tf.train.AdamOptimizer()

def loss_function(real, pred):
    mask = 1 - np.equal(real, 0)
    loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred)
    return tf.reduce_mean(loss_ * mask)

encoder_config = model_one.Config(lstm_size=240,
                                  embedding_size=30,
                                  attention_size=None,
                                  vocab_size=script.SCRIPTS['en'].vocab_size)
ja_decoder_config = model_one.Config(lstm_size=240,
                                     embedding_size=30,
                                     attention_size=120,
                                     attention='monotonic_bahdanau',
                                     vocab_size=script.SCRIPTS['ja'].vocab_size)
cmu_decoder_config = model_one.Config(lstm_size=240,
                                      embedding_size=30,
                                      attention_size=120,
                                      attention='monotonic_bahdanau',
                                      vocab_size=script.SCRIPTS['cmu'].vocab_size)
encoder = model_one.Encoder(encoder_config)
ja_decoder = model_one.Decoder(ja_decoder_config)
cmu_decoder = model_one.Decoder(cmu_decoder_config)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 ja_decoder=ja_decoder,
                                 cmu_decoder=cmu_decoder)

In [4]:
cmu_best_val_loss = None
cmu_checkpoint = None
for e in range(5):
    loss = train.run_one_epoch(cmu_train_dataset,
                               True,
                               from_script='en',
                               to_script='cmu',
                               encoder=encoder,
                               decoder=cmu_decoder,
                               optimizer=optimizer,
                               loss_function=loss_function)
    valid_loss = train.run_one_epoch(cmu_valid_dataset,
                                     False,
                                     from_script='en',
                                     to_script='cmu',
                                     encoder=encoder,
                                     decoder=cmu_decoder,
                                     loss_function=loss_function)
    if cmu_best_val_loss is None or valid_loss < cmu_best_val_loss:
        cmu_best_val_loss = valid_loss
        cmu_checkpoint = checkpoint.save(file_prefix=checkpoint_prefix)
    print("Epoch {}: Train Loss {:.3f}, Valid Loss {:.3f}".format(e, loss, valid_loss))
    print(decode.transliterate(input_strs=['derick'],
                               from_script='en',
                               to_script='cmu',
                               encoder=encoder,
                               decoder=cmu_decoder,
                               k_best=2,
                               decoding_method=decode.beam_search_decode))

Instructions for updating:
Colocations handled automatically by placer.


Epoch 0: Train Loss 10.731, Valid Loss 3.773
([['D EH1 R IH0 K S', 'D ER1 IH0 K S']], array([[-2.78148673, -3.65953329]]))


Epoch 1: Train Loss 3.030, Valid Loss 2.622
([['D EH1 R IH0 K S', 'D IH1 R IH0 K S']], array([[-1.70589991, -3.50420103]]))


Epoch 2: Train Loss 2.351, Valid Loss 2.346
([['D EH1 R IH0 K S', 'D EH1 R IH0 K IH0 NG']], array([[-2.06760684, -2.8355913 ]]))


Epoch 3: Train Loss 2.042, Valid Loss 2.131
([['D EH1 R IH0 K S', 'D EH1 R IH0 K IH0 K S']], array([[-3.94403873, -4.09484099]]))


Epoch 4: Train Loss 1.838, Valid Loss 2.029
([['D EH1 R IH0 K S', 'D EH1 R IH0 K IH0 S']], array([[-3.633751  , -4.33726468]]))


In [5]:
checkpoint.restore(cmu_checkpoint).assert_consumed()
print(train.run_one_epoch(cmu_valid_dataset,
                          False,
                          from_script='en',
                          to_script='cmu',
                          encoder=encoder,
                          decoder=cmu_decoder,
                          loss_function=loss_function))

tf.Tensor(2.029012, shape=(), dtype=float32)


In [6]:
def run_some_epochs(epochs):
    checkpoint_path = None
    best_val_loss = None
    for e in range(epochs):
        loss = train.run_one_epoch(eob_train_dataset,
                                   True,
                                   from_script='en',
                                   to_script='ja',
                                   encoder=encoder,
                                   decoder=ja_decoder,
                                   optimizer=optimizer,
                                   loss_function=loss_function)
        valid_loss = train.run_one_epoch(eob_valid_dataset,
                                         False,
                                         from_script='en',
                                         to_script='ja',
                                         encoder=encoder,
                                         decoder=ja_decoder,
                                         loss_function=loss_function)
        print("Epoch {}: Train Loss {:.3f}, Valid Loss {:.3f}".format(e, loss, valid_loss))
        print(decode.transliterate(input_strs=['derick'],
                                       from_script='en',
                                       to_script='ja',
                                       encoder=encoder,
                                       decoder=ja_decoder,
                                       k_best=2,
                                       decoding_method=decode.beam_search_decode))
        if best_val_loss is None or valid_loss < best_val_loss:
            best_val_loss = valid_loss
            checkpoint_path = checkpoint.save(file_prefix=checkpoint_prefix)
        else:
            break
    return checkpoint_path

In [7]:
for layer in encoder.layers:
    layer.trainable = False
checkpoint_path = run_some_epochs(5)
checkpoint.restore(checkpoint_path).assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

Epoch 0: Train Loss 14.270, Valid Loss 8.867
([['ディリク', 'デリック']], array([[-4.45877628, -4.71044749]]))


Epoch 1: Train Loss 6.988, Valid Loss 6.035
([['デリック', 'ディック']], array([[-3.42910734, -3.80303085]]))


Epoch 2: Train Loss 5.239, Valid Loss 5.497
([['ディック', 'ディリック']], array([[-3.83938559, -3.85506754]]))


Epoch 3: Train Loss 4.294, Valid Loss 5.091
([['デリック', 'ダリック']], array([[-2.9712317 , -3.96789042]]))


Epoch 4: Train Loss 3.578, Valid Loss 4.916
([['デリック', 'ディック']], array([[-3.5558138 , -3.97427858]]))


<tf.Tensor: id=110873345, shape=(), dtype=float32, numpy=4.8619604>

In [8]:
for layer in encoder.layers:
    layer.trainable = True
checkpoint_path = run_some_epochs(10)
checkpoint.restore(checkpoint_path).assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

Epoch 0: Train Loss 2.976, Valid Loss 4.879
([['ダリック', 'デリック']], array([[-2.31314964, -2.89069125]]))


Epoch 1: Train Loss 2.511, Valid Loss 4.724
([['ダリック', 'デリック']], array([[-2.69333865, -3.04696838]]))


Epoch 2: Train Loss 2.103, Valid Loss 4.792
([['ダリック', 'ダリキック']], array([[-2.77918531, -3.77301646]]))


<tf.Tensor: id=122401708, shape=(), dtype=float32, numpy=4.6661897>

In [9]:
valid_df = pd.read_csv('../data/split/eob_pairs_valid.csv',
                       keep_default_na=False)

In [10]:
tr = decode.transliterate(input_strs=valid_df['en'].values,
                     from_script='en',
                     to_script='ja',
                     encoder=encoder,
                     decoder=ja_decoder,
                     k_best=20,
                     num_beams=40,
                     decoding_method=decode.beam_search_decode)
evaluate.top_k_accuracy(valid_df['ja'].values, tr, k=1)

0.49069003285870755