In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
tf.enable_eager_execution()
import importlib
import os

from matplotlib import pyplot as plt
%matplotlib notebook

from transliteration import data, train, model_one, script, decode, evaluate

In [2]:
importlib.reload(data)
batch_size = 128
cmu_train_dataset = data.make_dataset('../data/tfrecord/cmu_train.tfrecord',
                                      from_script='en',
                                      to_script='cmu',
                                      combine_words_proportion=0.3,
                                      batch_size=batch_size)
cmu_valid_dataset = data.make_dataset('../data/tfrecord/cmu_valid.tfrecord',
                                      from_script='en',
                                      to_script='cmu',
                                      combine_words_proportion=0.3,
                                      batch_size=batch_size)
cmu_test_dataset = data.make_dataset('../data/tfrecord/cmu_test.tfrecord',
                                     from_script='en',
                                     to_script='cmu',
                                     combine_words_proportion=0.3,
                                     batch_size=batch_size)
eob_train_dataset = data.make_dataset('../data/tfrecord/eob_train.tfrecord',
                                      from_script='en',
                                      to_script='ja',
                                      batch_size=batch_size)
eob_valid_dataset = data.make_dataset('../data/tfrecord/eob_valid.tfrecord',
                                      from_script='en',
                                      to_script='ja',
                                      batch_size=batch_size)
eob_test_dataset = data.make_dataset('../data/tfrecord/eob_test.tfrecord',
                                     from_script='en',
                                     to_script='ja',
                                     batch_size=batch_size)

Instructions for updating:
Colocations handled automatically by placer.


In [3]:
optimizer = tf.train.AdamOptimizer()

def loss_function(real, pred):
    mask = 1 - np.equal(real, 0)
    loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred)
    return tf.reduce_mean(loss_ * mask)

encoder_config = model_one.Config(lstm_size=240,
                                  embedding_size=30,
                                  attention_size=None,
                                  vocab_size=script.SCRIPTS['en'].vocab_size)
ja_decoder_config = model_one.Config(lstm_size=240,
                                     embedding_size=30,
                                     attention_size=120,
                                     attention='monotonic_bahdanau',
                                     vocab_size=script.SCRIPTS['ja'].vocab_size)
cmu_decoder_config = model_one.Config(lstm_size=240,
                                      embedding_size=30,
                                      attention_size=120,
                                      attention='monotonic_bahdanau',
                                      vocab_size=script.SCRIPTS['cmu'].vocab_size)
encoder = model_one.Encoder(encoder_config)
ja_decoder = model_one.Decoder(ja_decoder_config)
cmu_decoder = model_one.Decoder(cmu_decoder_config)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 ja_decoder=ja_decoder,
                                 cmu_decoder=cmu_decoder)

In [4]:
cmu_best_val_loss = None
cmu_checkpoint = None
for e in range(10):
    loss = train.run_one_epoch(cmu_train_dataset,
                               True,
                               from_script='en',
                               to_script='cmu',
                               encoder=encoder,
                               decoder=cmu_decoder,
                               optimizer=optimizer,
                               loss_function=loss_function)
    valid_loss = train.run_one_epoch(cmu_valid_dataset,
                                     False,
                                     from_script='en',
                                     to_script='cmu',
                                     encoder=encoder,
                                     decoder=cmu_decoder,
                                     loss_function=loss_function)
    if cmu_best_val_loss is None or valid_loss < cmu_best_val_loss:
        cmu_best_val_loss = valid_loss
        cmu_checkpoint = checkpoint.save(file_prefix=checkpoint_prefix)
    print("Epoch {}: Train Loss {:.3f}, Valid Loss {:.3f}".format(e, loss, valid_loss))
    print(decode.transliterate(input_strs=['derick'],
                               from_script='en',
                               to_script='cmu',
                               encoder=encoder,
                               decoder=cmu_decoder,
                               k_best=2,
                               decoding_method=decode.beam_search_decode))

Epoch 0: Train Loss 24.251, Valid Loss 14.589


([['D R IH1 K K K IH0 NG K', 'D R IH1 K K K IH0 NG K AH0 N']], array([[-13.59901275, -17.61309446]]))


Epoch 1: Train Loss 10.364, Valid Loss 7.040
([['D EH1 R IH0 K IH0 NG', 'D EH1 R IH0 K IH0 K S ER0']], array([[-5.80079673, -9.81030743]]))


Epoch 2: Train Loss 5.727, Valid Loss 4.854


([['D EH1 R IH0 K IH0 NG', 'D EH1 R IH0 K EH2 L IH0 NG']], array([[-5.13794947, -8.80927134]]))


Epoch 3: Train Loss 4.252, Valid Loss 3.993


([['D EH1 R IH0 K ER0 Z', 'D EH1 R IH0 K AH0 L AY2 Z']], array([[-5.24770398, -8.08406002]]))


Epoch 4: Train Loss 3.593, Valid Loss 3.459


([['D EH1 R IH0 K ER0 Z', 'D EH1 R IH0 K AO2 R Z']], array([[-4.54251916, -6.78896037]]))


Epoch 5: Train Loss 3.207, Valid Loss 3.183


([['D EH1 R IH0 K', 'D EH1 R IH0 K ER0']], array([[-3.11693265, -3.53123404]]))


Epoch 6: Train Loss 2.956, Valid Loss 3.095
([['D ER0 IH1 K ER0 Z', 'D EH1 R IH0 K']], array([[-3.35321811, -3.66339079]]))


Epoch 7: Train Loss 2.771, Valid Loss 2.971
([['D EH1 R IH0 K ER0 Z', 'D EH1 R IH0 K AH0 N']], array([[-2.90840366, -4.89875054]]))


Epoch 8: Train Loss 2.606, Valid Loss 2.798
([['D EH1 R IH0 K ER0 Z', 'D ER0 IH1 K ER0 Z']], array([[-3.56117671, -3.64637422]]))


Epoch 9: Train Loss 2.457, Valid Loss 2.748
([['D EH1 R IH0 K', 'D EH1 R IH0 K ER0 Z']], array([[-2.47603393, -3.53397996]]))


In [5]:
checkpoint.restore(cmu_checkpoint).assert_consumed()
print(train.run_one_epoch(cmu_valid_dataset,
                          False,
                          from_script='en',
                          to_script='cmu',
                          encoder=encoder,
                          decoder=cmu_decoder,
                          loss_function=loss_function))

tf.Tensor(2.802323, shape=(), dtype=float32)


In [8]:
encoder.save_weights('./training_checkpoints/encoder_cmu_only')

In [6]:
def run_some_epochs(epochs):
    checkpoint_path = None
    best_val_loss = None
    for e in range(epochs):
        loss = train.run_one_epoch(eob_train_dataset,
                                   True,
                                   from_script='en',
                                   to_script='ja',
                                   encoder=encoder,
                                   decoder=ja_decoder,
                                   optimizer=optimizer,
                                   loss_function=loss_function)
        valid_loss = train.run_one_epoch(eob_valid_dataset,
                                         False,
                                         from_script='en',
                                         to_script='ja',
                                         encoder=encoder,
                                         decoder=ja_decoder,
                                         loss_function=loss_function)
        print("Epoch {}: Train Loss {:.3f}, Valid Loss {:.3f}".format(e, loss, valid_loss))
        print(decode.transliterate(input_strs=['derick'],
                                       from_script='en',
                                       to_script='ja',
                                       encoder=encoder,
                                       decoder=ja_decoder,
                                       k_best=2,
                                       decoding_method=decode.beam_search_decode))
        if best_val_loss is None or valid_loss < best_val_loss:
            best_val_loss = valid_loss
            checkpoint_path = checkpoint.save(file_prefix=checkpoint_prefix)
    return checkpoint_path

In [9]:
for layer in encoder.layers:
    layer.trainable = False
checkpoint_path = run_some_epochs(5)
checkpoint.restore(checkpoint_path).assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

Epoch 0: Train Loss 15.029, Valid Loss 7.992
([['デリック', 'ディック']], array([[-4.20449633, -4.48618308]]))


Epoch 1: Train Loss 6.801, Valid Loss 5.887
([['デリック', 'ディック']], array([[-3.39015117, -4.17438946]]))


Epoch 2: Train Loss 4.986, Valid Loss 5.087
([['デリック', 'ディック']], array([[-2.57261912, -3.45445298]]))


Epoch 3: Train Loss 3.980, Valid Loss 4.891
([['デリック', 'ディック']], array([[-2.34256739, -4.12991123]]))


Epoch 4: Train Loss 3.264, Valid Loss 4.785
([['デリック', 'デリ']], array([[-1.92635161, -3.25199075]]))


<tf.Tensor: id=174943845, shape=(), dtype=float32, numpy=4.789574>

In [12]:
for layer in encoder.layers:
    layer.trainable = True
checkpoint_path = run_some_epochs(10)
checkpoint.restore(checkpoint_path).assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

Epoch 0: Train Loss 2.738, Valid Loss 4.695
([['デリック', 'デリ']], array([[-1.98842829, -3.12527804]]))


Epoch 1: Train Loss 2.272, Valid Loss 4.783
([['デリック', 'デリックス']], array([[-1.69626716, -3.9795914 ]]))


Epoch 2: Train Loss 1.897, Valid Loss 4.803
([['デリック', 'ダリック']], array([[-1.29844523, -3.14170392]]))


Epoch 3: Train Loss 1.604, Valid Loss 4.952
([['デリック', 'ディリック']], array([[-1.45784592, -2.14429108]]))


Epoch 4: Train Loss 1.316, Valid Loss 4.973


([['デリック', 'デリックス']], array([[-1.11313215, -3.10429392]]))


Epoch 5: Train Loss 1.104, Valid Loss 5.155


([['デリック', 'ディック']], array([[-1.17609765, -3.13347937]]))


Epoch 6: Train Loss 0.933, Valid Loss 5.537
([['デリック', 'ディリック']], array([[-0.92497172, -2.38745292]]))


Epoch 7: Train Loss 0.797, Valid Loss 5.518
([['デリック', 'ディック']], array([[-1.22925424, -1.90467167]]))


Epoch 8: Train Loss 0.691, Valid Loss 5.655
([['ディリック', 'デリック']], array([[-0.86461503, -2.02275647]]))


Epoch 9: Train Loss 0.594, Valid Loss 5.645
([['デリック', 'ディリック']], array([[-1.01886313, -1.67887097]]))


<tf.Tensor: id=219422862, shape=(), dtype=float32, numpy=4.6767993>

In [10]:
valid_df = pd.read_csv('../data/split/eob_pairs_valid.csv',
                       keep_default_na=False)

In [13]:
tr = decode.transliterate(input_strs=valid_df['en'].values,
                     from_script='en',
                     to_script='ja',
                     encoder=encoder,
                     decoder=ja_decoder,
                     k_best=10,
                     num_beams=20,
                     decoding_method=decode.beam_search_decode)
evaluate.top_k_accuracy(valid_df['ja'].values, tr, k=1)

0.5180722891566265