In [1]:
import json
import tensorflow as tf
import numpy as np
from sklearn.cross_validation import train_test_split



In [2]:
with open('dictionary.json') as fopen:
    x = json.load(fopen)
dictionary_english = x['english']
dictionary_bahasa = x['bahasa']

In [3]:
with open('english-malay.json') as fopen:
    x = json.load(fopen)
english = x[0]
bahasa = x[1]

In [4]:
GO = dictionary_english['dictionary']['GO']
PAD = dictionary_english['dictionary']['PAD']
EOS = dictionary_english['dictionary']['EOS']
UNK = dictionary_english['dictionary']['UNK']

In [5]:
from tqdm import tqdm

for i in tqdm(range(len(bahasa))):
    bahasa[i].append('EOS')

100%|██████████| 100000/100000 [00:00<00:00, 1105174.76it/s]


In [6]:
emb_size = 256
n_hidden = 256
n_layers = 4
n_attn_heads = 16
learning_rate = 1e-4
batch_size = 128
epoch = 20

In [7]:
def encoder_block(inp, n_hidden, filter_size):
    inp = tf.expand_dims(inp, 2)
    inp = tf.pad(inp, [[0, 0], [(filter_size[0]-1)//2, (filter_size[0]-1)//2], [0, 0], [0, 0]])
    conv = tf.layers.conv2d(inp, n_hidden, filter_size, padding="VALID", activation=None)
    conv = tf.squeeze(conv, 2)
    return conv

def decoder_block(inp, n_hidden, filter_size):
    inp = tf.expand_dims(inp, 2)
    inp = tf.pad(inp, [[0, 0], [filter_size[0]-1, 0], [0, 0], [0, 0]])
    conv = tf.layers.conv2d(inp, n_hidden, filter_size, padding="VALID", activation=None)
    conv = tf.squeeze(conv, 2)
    return conv

def glu(x):
    return tf.multiply(x[:, :, :tf.shape(x)[2]//2], tf.sigmoid(x[:, :, tf.shape(x)[2]//2:]))

def layer(inp, conv_block, kernel_width, n_hidden, residual=None):
    z = conv_block(inp, n_hidden, (kernel_width, 1))
    return glu(z) + (residual if residual is not None else 0)

class Model:
    def __init__(self):
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])

        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype = tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype = tf.int32)
        batch_size = tf.shape(self.X)[0]
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        
        encoder_embedding = tf.Variable(tf.random_uniform([len(dictionary_english['dictionary']), emb_size], -1, 1))
        decoder_embedding = tf.Variable(tf.random_uniform([len(dictionary_bahasa['dictionary']), emb_size], -1, 1))
        
        def forward(x, y,reuse=False):
            with tf.variable_scope('forward',reuse=reuse):
                encoder_embedded = tf.nn.embedding_lookup(encoder_embedding, x)
                decoder_embedded = tf.nn.embedding_lookup(decoder_embedding, y)

                e = tf.identity(encoder_embedded)

                for i in range(n_layers):
                    z = layer(encoder_embedded, encoder_block, 3, n_hidden * 2, encoder_embedded)
                    encoder_embedded = z

                encoder_output, output_memory = z, z + e
                g = tf.identity(decoder_embedded)

                for i in range(n_layers):
                    attn_res = h = layer(decoder_embedded, decoder_block, 3, n_hidden * 2, 
                                         residual=tf.zeros_like(decoder_embedded))
                    C = []
                    for j in range(n_attn_heads):
                        h_ = tf.layers.dense(h, n_hidden//n_attn_heads)
                        g_ = tf.layers.dense(g, n_hidden//n_attn_heads)
                        zu_ = tf.layers.dense(encoder_output, n_hidden//n_attn_heads)
                        ze_ = tf.layers.dense(output_memory, n_hidden//n_attn_heads)

                        d = tf.layers.dense(h_, n_hidden//n_attn_heads) + g_
                        dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1]))
                        a = tf.nn.softmax(dz)
                        c_ = tf.matmul(a, ze_)
                        C.append(c_)

                    c = tf.concat(C, 2)
                    h = tf.layers.dense(attn_res + c, n_hidden)
                    decoder_embedded = h

                return tf.layers.dense(decoder_embedded, len(dictionary_bahasa['dictionary']))
        self.training_logits = forward(self.X, decoder_input)
        self.logits = forward(self.X, self.Y, reuse=True)
        self.k = tf.placeholder(dtype = tf.int32)
        p = tf.nn.softmax(self.logits)
        self.topk_logprobs, self.topk_ids = tf.nn.top_k(tf.log(p), self.k)
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [8]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()
sess.run(tf.global_variables_initializer())

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Use keras.layers.dense instead.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use tf.cast instead.


In [9]:
def str_idx(corpus, dic):
    X = []
    for i in corpus:
        ints = []
        for k in i:
            ints.append(dic.get(k,UNK))
        X.append(ints)
    return X

In [10]:
english = str_idx(english, dictionary_english['dictionary'])
bahasa = str_idx(bahasa, dictionary_bahasa['dictionary'])

In [11]:
train_X, test_X, train_Y, test_Y = train_test_split(english, bahasa, test_size = 0.2)

In [12]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [13]:
import time

for EPOCH in range(20):
    lasttime = time.time()

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x, _ = pad_sentence_batch(train_X[i : index], PAD)
        batch_y, _ = pad_sentence_batch(train_Y[i : index], PAD)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)
        
    pbar = tqdm(range(0, len(test_X), batch_size), desc = 'test minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x, _ = pad_sentence_batch(test_X[i : index], PAD)
        batch_y, _ = pad_sentence_batch(test_Y[i : index], PAD)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size
        
    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )

train minibatch loop: 100%|██████████| 625/625 [02:29<00:00,  4.31it/s, accuracy=0.122, cost=5.89] 
test minibatch loop: 100%|██████████| 157/157 [00:21<00:00,  7.20it/s, accuracy=0.11, cost=6.05]  
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 171.5534746646881
epoch: 0, training loss: 6.537692, training acc: 0.083362, valid loss: 6.034824, valid acc: 0.116770



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.32it/s, accuracy=0.16, cost=5.43] 
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.07it/s, accuracy=0.122, cost=5.69]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 163.3060667514801
epoch: 1, training loss: 5.718268, training acc: 0.137435, valid loss: 5.635443, valid acc: 0.152731



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.33it/s, accuracy=0.188, cost=5.13]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.09it/s, accuracy=0.155, cost=5.5] 
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 163.10752296447754
epoch: 2, training loss: 5.360307, training acc: 0.168204, valid loss: 5.409797, valid acc: 0.178136



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.33it/s, accuracy=0.207, cost=4.92]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.10it/s, accuracy=0.187, cost=5.38]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.8532919883728
epoch: 3, training loss: 5.102846, training acc: 0.191015, valid loss: 5.263217, valid acc: 0.194642



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.32it/s, accuracy=0.23, cost=4.69] 
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.10it/s, accuracy=0.191, cost=5.27]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.508446931839
epoch: 4, training loss: 4.897198, training acc: 0.209113, valid loss: 5.149193, valid acc: 0.210202



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.34it/s, accuracy=0.245, cost=4.55]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.11it/s, accuracy=0.203, cost=5.21]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.46776747703552
epoch: 5, training loss: 4.718373, training acc: 0.224997, valid loss: 5.063505, valid acc: 0.223603



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.31it/s, accuracy=0.253, cost=4.42]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.12it/s, accuracy=0.217, cost=5.13]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.3607897758484
epoch: 6, training loss: 4.562209, training acc: 0.239301, valid loss: 4.992029, valid acc: 0.234850



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.32it/s, accuracy=0.276, cost=4.3] 
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.12it/s, accuracy=0.214, cost=5.11]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.38609957695007
epoch: 7, training loss: 4.425593, training acc: 0.252429, valid loss: 4.941936, valid acc: 0.244461



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.35it/s, accuracy=0.286, cost=4.17]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.11it/s, accuracy=0.229, cost=5.03]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.33119654655457
epoch: 8, training loss: 4.302183, training acc: 0.265649, valid loss: 4.894171, valid acc: 0.252230



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.33it/s, accuracy=0.298, cost=4.07]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.15it/s, accuracy=0.234, cost=5.03]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.2186315059662
epoch: 9, training loss: 4.195674, training acc: 0.277147, valid loss: 4.855413, valid acc: 0.257811



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.35it/s, accuracy=0.304, cost=3.98]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.15it/s, accuracy=0.227, cost=4.99]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.0120666027069
epoch: 10, training loss: 4.098917, training acc: 0.287443, valid loss: 4.812391, valid acc: 0.265092



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.32it/s, accuracy=0.31, cost=3.92] 
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.10it/s, accuracy=0.23, cost=4.99] 
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.10736179351807
epoch: 11, training loss: 4.012362, training acc: 0.297252, valid loss: 4.778640, valid acc: 0.270392



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.34it/s, accuracy=0.321, cost=3.84]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.17it/s, accuracy=0.24, cost=4.96] 
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 161.91348266601562
epoch: 12, training loss: 3.926506, training acc: 0.306347, valid loss: 4.755209, valid acc: 0.275216



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.35it/s, accuracy=0.325, cost=3.8] 
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.17it/s, accuracy=0.249, cost=4.88]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.0905864238739
epoch: 13, training loss: 3.854247, training acc: 0.314571, valid loss: 4.729507, valid acc: 0.280167



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.36it/s, accuracy=0.324, cost=3.73]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.17it/s, accuracy=0.26, cost=4.84] 
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 161.60906338691711
epoch: 14, training loss: 3.784988, training acc: 0.322622, valid loss: 4.696328, valid acc: 0.284120



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.36it/s, accuracy=0.342, cost=3.65]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.19it/s, accuracy=0.264, cost=4.83]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 161.60375428199768
epoch: 15, training loss: 3.724188, training acc: 0.329533, valid loss: 4.663269, valid acc: 0.289800



train minibatch loop: 100%|██████████| 625/625 [02:23<00:00,  4.31it/s, accuracy=0.349, cost=3.58]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.11it/s, accuracy=0.254, cost=4.83]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.7857267856598
epoch: 16, training loss: 3.657729, training acc: 0.337056, valid loss: 4.629069, valid acc: 0.296313



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.36it/s, accuracy=0.342, cost=3.54]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.18it/s, accuracy=0.257, cost=4.83]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 162.06868386268616
epoch: 17, training loss: 3.599673, training acc: 0.344293, valid loss: 4.619375, valid acc: 0.298722



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.35it/s, accuracy=0.356, cost=3.48]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.17it/s, accuracy=0.254, cost=4.85]
train minibatch loop:   0%|          | 0/625 [00:00<?, ?it/s]

time taken: 161.62545585632324
epoch: 18, training loss: 3.541353, training acc: 0.351798, valid loss: 4.602534, valid acc: 0.302699



train minibatch loop: 100%|██████████| 625/625 [02:22<00:00,  4.34it/s, accuracy=0.374, cost=3.43]
test minibatch loop: 100%|██████████| 157/157 [00:19<00:00,  8.17it/s, accuracy=0.269, cost=4.8] 

time taken: 161.6322410106659
epoch: 19, training loss: 3.484283, training acc: 0.358929, valid loss: 4.561511, valid acc: 0.306542




