In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
tf.enable_eager_execution()
import importlib
import os

from matplotlib import pyplot as plt
%matplotlib notebook

from transliteration import data, train, model_one, script, decode, evaluate

In [2]:
importlib.reload(data)
batch_size = 128
cmu_train_dataset = data.make_dataset('../data/tfrecord/cmu_train.tfrecord',
                                      from_script='en',
                                      to_script='cmu',
                                      combine_words_proportion=0.3,
                                      batch_size=batch_size)
cmu_valid_dataset = data.make_dataset('../data/tfrecord/cmu_valid.tfrecord',
                                      from_script='en',
                                      to_script='cmu',
                                      combine_words_proportion=0.3,
                                      batch_size=batch_size)
cmu_test_dataset = data.make_dataset('../data/tfrecord/cmu_test.tfrecord',
                                     from_script='en',
                                     to_script='cmu',
                                     combine_words_proportion=0.3,
                                     batch_size=batch_size)
eob_train_dataset = data.make_dataset('../data/tfrecord/eob_train.tfrecord',
                                      from_script='en',
                                      to_script='ja',
                                      batch_size=batch_size)
eob_valid_dataset = data.make_dataset('../data/tfrecord/eob_valid.tfrecord',
                                      from_script='en',
                                      to_script='ja',
                                      batch_size=batch_size)
eob_test_dataset = data.make_dataset('../data/tfrecord/eob_test.tfrecord',
                                     from_script='en',
                                     to_script='ja',
                                     batch_size=batch_size)

Instructions for updating:
Colocations handled automatically by placer.


In [3]:
optimizer = tf.train.AdamOptimizer()

def loss_function(real, pred):
    mask = 1 - np.equal(real, 0)
    loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred)
    return tf.reduce_mean(loss_ * mask)

encoder_config = model_one.Config(lstm_size=480,
                                  embedding_size=30,
                                  attention_size=None,
                                  vocab_size=script.SCRIPTS['en'].vocab_size)
ja_decoder_config = model_one.Config(lstm_size=240,
                                     embedding_size=30,
                                     attention_size=120,
                                     attention='monotonic_bahdanau',
                                     vocab_size=script.SCRIPTS['ja'].vocab_size)
cmu_decoder_config = model_one.Config(lstm_size=480,
                                      embedding_size=30,
                                      attention_size=240,
                                      attention='monotonic_bahdanau',
                                      vocab_size=script.SCRIPTS['cmu'].vocab_size)
encoder = model_one.Encoder(encoder_config)
ja_decoder = model_one.Decoder(ja_decoder_config)
cmu_decoder = model_one.Decoder(cmu_decoder_config)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 ja_decoder=ja_decoder,
                                 cmu_decoder=cmu_decoder)

In [4]:
cmu_best_val_loss = None
cmu_checkpoint = None
for e in range(15):
    loss = train.run_one_epoch(cmu_train_dataset,
                               True,
                               from_script='en',
                               to_script='cmu',
                               encoder=encoder,
                               decoder=cmu_decoder,
                               optimizer=optimizer,
                               loss_function=loss_function)
    valid_loss = train.run_one_epoch(cmu_valid_dataset,
                                     False,
                                     from_script='en',
                                     to_script='cmu',
                                     encoder=encoder,
                                     decoder=cmu_decoder,
                                     loss_function=loss_function)
    if cmu_best_val_loss is None or valid_loss < cmu_best_val_loss:
        cmu_best_val_loss = valid_loss
        cmu_checkpoint = checkpoint.save(file_prefix=checkpoint_prefix)
    print("Epoch {}: Train Loss {:.3f}, Valid Loss {:.3f}".format(e, loss, valid_loss))
    print(decode.transliterate(input_strs=['derick'],
                               from_script='en',
                               to_script='cmu',
                               encoder=encoder,
                               decoder=cmu_decoder,
                               k_best=2,
                               decoding_method=decode.beam_search_decode))

Epoch 0: Train Loss 21.166, Valid Loss 12.236


([['D EH1 R IH0 K K EY2 T', 'D IH0 R IH1 K K AH0 N']], array([[-8.8619249 , -9.01395541]]))


Epoch 1: Train Loss 8.148, Valid Loss 5.611


([['D EH1 R IH0 K AH0 L IH0 D', 'D EH1 R IH0 K AH0 L AY2 Z']], array([[-9.19072141, -9.23511722]]))


Epoch 2: Train Loss 4.444, Valid Loss 4.357


([['D EH1 R IH0 K AH0 L AY2 Z', 'D EH1 R IH0 K AH0 L AY2 K IH2 NG K EH2 R IH0 K IH0 S UW1']], array([[ -7.37568376, -21.20046253]]))


Epoch 3: Train Loss 3.468, Valid Loss 3.373


([['D EH1 R IH0 K AH0 L AY2 Z', 'D EH1 R IH0 K AH0 L IH2 Z']], array([[-6.86713472, -7.56901041]]))


Epoch 4: Train Loss 2.950, Valid Loss 3.028


([['D EH1 R IH0 K AH0 L AY2 Z', 'D IH0 R IH1 K AH0 L AY2 Z UW2 D IH2 NG']], array([[ -7.92169074, -15.76549021]]))


Epoch 5: Train Loss 2.700, Valid Loss 2.936


([['D EH1 R IH0 K OW2 L D OY2 Z', 'D EH1 R IH0 K AO2 R IY0 P OY2 Z']], array([[ -8.60524111, -11.27415145]]))


Epoch 6: Train Loss 2.471, Valid Loss 2.740


([['D EH1 R IH0 K AH0 L AY2 Z', 'D EH1 R IH0 K AH0 L D AY2 OY2 NG']], array([[-8.00446287, -9.93428174]]))


Epoch 7: Train Loss 2.304, Valid Loss 2.651


([['D EH1 R IH0 K OW2 L D IH2 NG', 'D EH1 R IH0 K OW2 L D IH2 NG ER2']], array([[-7.59001536, -9.00992627]]))


Epoch 8: Train Loss 2.163, Valid Loss 2.568


([['D EH1 R IH0 K ER0', 'D EH1 R IH0 K OW2 L D AW0 Z UW2 T ER0']], array([[ -3.72443988, -12.8405595 ]]))


Epoch 9: Train Loss 1.989, Valid Loss 2.550


([['D EH1 R IH0 K AO2 L IH0 D IH0 NG', 'D EH1 R IH0 K AO2 L IH0 D IH0 S']], array([[-8.24104696, -8.59390437]]))


Epoch 10: Train Loss 1.867, Valid Loss 2.530


([['D ER0 IH1 K AH0 L AY2 T AE2 NG', 'D EH1 R IH0 K OW2 L D OY2 Z']], array([[-7.37564243, -8.77276751]]))


Epoch 11: Train Loss 1.776, Valid Loss 2.539


([['D EH1 R IH0 K AO2 R', 'D EH1 R IH0 K AO2 R IH0 F IH2 L D ER2 AY2 Z']], array([[ -4.07103087, -13.57692011]]))


Epoch 12: Train Loss 1.638, Valid Loss 2.508


([['D EH1 R IH0 K', 'D EH1 R IH0 K Y UW2 EH2 L IH0 D IH0 S']], array([[ -3.03253228, -11.00656966]]))


Epoch 13: Train Loss 1.532, Valid Loss 2.662


([['D EH1 R IH0 K AH0 L AY2 F UH2 NG', 'D EH1 R IH0 K Y UW2 AY2 L D AO0 R AY1 P AO0 NG']], array([[ -8.96566794, -13.48465992]]))


Epoch 14: Train Loss 1.459, Valid Loss 2.645


([['D EH1 R IH0 K AH0 L AY2 F UH2 T S UW2 AH1 K AH0 L AY2 P', 'D EH1 R IH0 K AH0 L AY2 F UH2 T S UW2 AH1 K AH0 L AY2 F']], array([[-13.14848475, -13.41284089]]))


In [5]:
checkpoint.restore(cmu_checkpoint).assert_consumed()
print(train.run_one_epoch(cmu_valid_dataset,
                          False,
                          from_script='en',
                          to_script='cmu',
                          encoder=encoder,
                          decoder=cmu_decoder,
                          loss_function=loss_function))

tf.Tensor(2.5199614, shape=(), dtype=float32)


In [8]:
encoder.save_weights('./training_checkpoints/encoder_big_cmu_only')

In [6]:
def run_some_epochs(epochs):
    checkpoint_path = None
    best_val_loss = None
    for e in range(epochs):
        loss = train.run_one_epoch(eob_train_dataset,
                                   True,
                                   from_script='en',
                                   to_script='ja',
                                   encoder=encoder,
                                   decoder=ja_decoder,
                                   optimizer=optimizer,
                                   loss_function=loss_function)
        valid_loss = train.run_one_epoch(eob_valid_dataset,
                                         False,
                                         from_script='en',
                                         to_script='ja',
                                         encoder=encoder,
                                         decoder=ja_decoder,
                                         loss_function=loss_function)
        print("Epoch {}: Train Loss {:.3f}, Valid Loss {:.3f}".format(e, loss, valid_loss))
        print(decode.transliterate(input_strs=['derick'],
                                       from_script='en',
                                       to_script='ja',
                                       encoder=encoder,
                                       decoder=ja_decoder,
                                       k_best=2,
                                       decoding_method=decode.beam_search_decode))
        if best_val_loss is None or valid_loss < best_val_loss:
            best_val_loss = valid_loss
            checkpoint_path = checkpoint.save(file_prefix=checkpoint_prefix)
    return checkpoint_path

In [7]:
for layer in encoder.layers:
    layer.trainable = False
checkpoint_path = run_some_epochs(5)
checkpoint.restore(checkpoint_path).assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

Epoch 0: Train Loss 14.073, Valid Loss 7.238
([['デリアキャン', 'デリアキャント']], array([[ -8.34756726, -10.81557959]]))


Epoch 1: Train Loss 6.051, Valid Loss 5.448


([['デリアク', 'ディアキャックション']], array([[ -4.41941084, -15.66589578]]))


Epoch 2: Train Loss 4.283, Valid Loss 4.851


([['ディリカック', 'ディリカックト']], array([[-6.76030186, -9.10081938]]))


Epoch 3: Train Loss 3.177, Valid Loss 4.697


([['ディアキャム', 'ディアキャング']], array([[-6.62485814, -9.01161444]]))


Epoch 4: Train Loss 2.400, Valid Loss 4.839


([['ディリキャックス', 'ディリキャックスポイトラフィュアップト']], array([[ -9.1408354 , -28.67769193]]))


<tf.Tensor: id=252329653, shape=(), dtype=float32, numpy=4.640569>

In [8]:
for layer in encoder.layers:
    layer.trainable = True
checkpoint_path = run_some_epochs(10)
checkpoint.restore(checkpoint_path).assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

Epoch 0: Train Loss 2.367, Valid Loss 4.933


([['ディリカ', 'ディリカム']], array([[-4.07673901, -4.35391801]]))


Epoch 1: Train Loss 1.808, Valid Loss 4.858


([['ディリキャブ', 'ディリキャブス']], array([[-6.21442242, -7.03158577]]))


Epoch 2: Train Loss 1.387, Valid Loss 4.935


([['デリコン', 'ディリキャックショョングホモーム']], array([[ -3.23265025, -23.32005098]]))


Epoch 3: Train Loss 1.038, Valid Loss 5.155


([['ディリキャックショョングホモーム', 'ディリキャックショョングホディアム']], array([[-20.92677638, -23.05851698]]))


Epoch 4: Train Loss 0.842, Valid Loss 5.339


([['ディリキャックショョングホモーム', 'ディリキャックショョングホモージャム']], array([[-20.22624055, -23.82038466]]))


Epoch 5: Train Loss 0.694, Valid Loss 5.427


([['ディリック', 'ディリックプコーム']], array([[-3.66283104, -9.55650542]]))


Epoch 6: Train Loss 0.604, Valid Loss 5.630


([['ディリック', 'ディリックグ']], array([[-3.02801965, -5.06569622]]))


Epoch 7: Train Loss 0.537, Valid Loss 5.619


([['ディリックプォコーム', 'ディリックプコーマン']], array([[-9.13205026, -9.72117453]]))


Epoch 8: Train Loss 0.487, Valid Loss 5.952


([['ディリック', 'ディリックグ']], array([[-3.06644919, -4.70954791]]))


Epoch 9: Train Loss 0.457, Valid Loss 5.656


([['ディリックプコーム', 'ディリックプコール']], array([[-7.51793574, -8.276663  ]]))


<tf.Tensor: id=290142761, shape=(), dtype=float32, numpy=4.879769>

In [11]:
print(checkpoint_path)

./training_checkpoints/ckpt-18


In [12]:
checkpoint.restore('./training_checkpoints/ckpt-16').assert_consumed()
train.run_one_epoch(eob_valid_dataset,
                    False,
                    from_script='en',
                    to_script='ja',
                    encoder=encoder,
                    decoder=ja_decoder,
                    loss_function=loss_function)

<tf.Tensor: id=297270050, shape=(), dtype=float32, numpy=4.629512>

In [9]:
valid_df = pd.read_csv('../data/split/eob_pairs_valid.csv',
                       keep_default_na=False)

In [13]:
tr = decode.transliterate(input_strs=valid_df['en'].values,
                     from_script='en',
                     to_script='ja',
                     encoder=encoder,
                     decoder=ja_decoder,
                     k_best=10,
                     num_beams=20,
                     decoding_method=decode.beam_search_decode)
evaluate.top_k_accuracy(valid_df['ja'].values, tr, k=1)

0.5191675794085433