In [1]:
import os
import re
import tensorflow as tf
import numpy as np
import json
import collections
from sklearn.cross_validation import train_test_split
from tensor2tensor.utils import beam_search, rouge



In [2]:
with open('news-30k.json') as fopen:
    news = json.load(fopen)
len(news)

29855

In [3]:
news[0]

{'title': 'Ibu saudara Haziq terharu sokongan rakyat Malaysia',
 'url': 'https://www.themalaysianinsight.com/bahasa/s/142491',
 'news': 'themalaysianinsight',
 'language': 'malay',
 'top-image': 'https://www.themalaysianinsight.com/resources/stories_images/142491/perhimpunanan_solidarity_kedamaian_03__full.jpg',
 'text': 'WARIS keluarga Allahyarham Muhammad Haziq Mohd Tarmizi, 17, yang terkorban dalam tragedi tembakan di Christchurch, New Zealand, pada 15 Mac lepas, melahirkan rasa terharu akan sokongan diberi rakyat Malaysia semasa perhimpunan Solidariti Kedamaian.\n\nZarina Shuib , ibu saudara Muhammad Haziq, memanjatkan kesyukuran kepada Allah SWT kerana berkesempatan menyertai rakyat Malaysia dalam perhimpunan itu di Kuala Lumpur hari ini.',
 'date': '2019-03-23T03:52:02',
 'date_utc': '2019-03-22T19:52:02'}

In [4]:
import malaya
tokenizer = malaya.preprocessing._SocialTokenizer().tokenize

accept_tokens = ',-.()"\''

def is_number_regex(s):
    if re.match("^\d+?\.\d+?$", s) is None:
        return s.isdigit()
    return True

def detect_money(word):
    if word[:2] == 'rm' and is_number_regex(word[2:]):
        return True
    else:
        return False

def preprocessing(string):
    tokenized = tokenizer(string)
    tokenized = [w.lower() for w in tokenized if len(w) > 1 or w in accept_tokens]
    tokenized = ['<NUM>' if is_number_regex(w) else w for w in tokenized]
    tokenized = ['<MONEY>' if detect_money(w) else w for w in tokenized]
    return tokenized

def clean_label(label):
    string = re.sub('[^A-Za-z\- ]+', ' ', label)
    return re.sub(r'[ ]+', ' ', string.lower()).strip()

In [5]:
from tqdm import tqdm

min_len = 5
max_len = 500

x, y = [], []
for n in tqdm(news):
    if len(n['text'].split()) > min_len:
        p = preprocessing(n['text'])[:max_len]
        x.append(p)
        p = preprocessing(n['title'])
        y.append(p)

100%|██████████| 29855/29855 [00:45<00:00, 662.58it/s]


In [6]:
len(x), len(y)

(29855, 29855)

In [7]:
def build_dataset(words, n_words):
    count = [['PAD', 0], ['GO', 1], ['EOS', 2], ['UNK', 3]]
    count.extend(collections.Counter(words).most_common(n_words))
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary

In [8]:
import itertools

concat = list(itertools.chain(*x)) + list(itertools.chain(*y))
vocabulary_size = len(list(set(concat)))
data, count, dictionary, rev_dictionary = build_dataset(concat, vocabulary_size)
print('vocab from size: %d'%(vocabulary_size))
print('Most common words', count[4:10])
print('Sample data', data[:10], [rev_dictionary[i] for i in data[:10]])
print('filtered vocab size:',len(dictionary))
print("% of vocab used: {}%".format(round(len(dictionary)/vocabulary_size,4)*100))

vocab from size: 88660
Most common words [(',', 388655), ('.', 339847), ('yang', 159163), ('dan', 148885), ('di', 128404), ('-', 123230)]
Sample data [4308, 289, 1459, 354, 1533, 117, 3688, 4, 10, 4] ['waris', 'keluarga', 'allahyarham', 'muhammad', 'haziq', 'mohd', 'tarmizi', ',', '<NUM>', ',']
filtered vocab size: 88664
% of vocab used: 100.0%


In [9]:
for i in range(len(y)):
    y[i].append('EOS')

In [10]:
GO = dictionary['GO']
PAD = dictionary['PAD']
EOS = dictionary['EOS']
UNK = dictionary['UNK']

In [11]:
def str_idx(corpus, dic, UNK=3):
    X = []
    for i in corpus:
        ints = []
        for k in i:
            ints.append(dic.get(k, UNK))
        X.append(ints)
    return X

In [12]:
X = str_idx(x, dictionary)
Y = str_idx(y, dictionary)

In [13]:
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size = 0.1)

In [14]:
def embed_seq(x, vocab_sz, embed_dim, name, zero_pad=True): 
    embedding = tf.get_variable(name, [vocab_sz, embed_dim]) 
    if zero_pad:
        embedding = tf.concat([tf.zeros([1, embed_dim]), embedding[1:, :]], 0) 
    x = tf.nn.embedding_lookup(embedding, x)
    return x

def position_encoding(inputs):
    T = tf.shape(inputs)[1]
    repr_dim = inputs.get_shape()[-1].value
    pos = tf.reshape(tf.range(0.0, tf.to_float(T), dtype=tf.float32), [-1, 1])
    i = np.arange(0, repr_dim, 2, np.float32)
    denom = np.reshape(np.power(10000.0, i / repr_dim), [1, -1])
    enc = tf.expand_dims(tf.concat([tf.sin(pos / denom), tf.cos(pos / denom)], 1), 0)
    return tf.tile(enc, [tf.shape(inputs)[0], 1, 1])

def layer_norm(inputs, epsilon=1e-8):
    mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
    normalized = (inputs - mean) / (tf.sqrt(variance + epsilon))
    params_shape = inputs.get_shape()[-1:]
    gamma = tf.get_variable('gamma', params_shape, tf.float32, tf.ones_initializer())
    beta = tf.get_variable('beta', params_shape, tf.float32, tf.zeros_initializer())
    return gamma * normalized + beta


def cnn_block(x, dilation_rate, pad_sz, hidden_dim, kernel_size):
    x = layer_norm(x)
    pad = tf.zeros([tf.shape(x)[0], pad_sz, hidden_dim])
    x =  tf.layers.conv1d(inputs = tf.concat([pad, x, pad], 1),
                          filters = hidden_dim,
                          kernel_size = kernel_size,
                          dilation_rate = dilation_rate)
    x = x[:, :-pad_sz, :]
    x = tf.nn.relu(x)
    return x

class Summarization:
    def __init__(self, size_layer, num_layers, embedded_size, 
                 dict_size, learning_rate, 
                 kernel_size = 2, n_attn_heads = 16):

        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])

        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype = tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype = tf.int32)
        batch_size = tf.shape(self.X)[0]
        self.batch_size = batch_size
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        
        self.embedding = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        
        self.num_layers = num_layers
        self.kernel_size = kernel_size
        self.size_layer = size_layer
        self.n_attn_heads = n_attn_heads
        self.dict_size = dict_size
        
        self.training_logits = self.forward(self.X, decoder_input)

        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
    def forward(self, x, y, reuse = False):
        with tf.variable_scope('forward',reuse=reuse):
            with tf.variable_scope('forward',reuse=reuse):
                encoder_embedded = tf.nn.embedding_lookup(self.embedding, x)
                decoder_embedded = tf.nn.embedding_lookup(self.embedding, y)
                encoder_embedded += position_encoding(encoder_embedded)

                for i in range(self.num_layers): 
                    dilation_rate = 2 ** i
                    pad_sz = (self.kernel_size - 1) * dilation_rate 
                    with tf.variable_scope('block_%d'%i,reuse=reuse):
                        encoder_embedded += cnn_block(encoder_embedded, dilation_rate, 
                                                      pad_sz, self.size_layer, self.kernel_size)

                g = tf.identity(decoder_embedded)
                for i in range(self.num_layers):
                    dilation_rate = 2 ** i
                    pad_sz = (self.kernel_size - 1) * dilation_rate
                    with tf.variable_scope('decode_%d'%i,reuse=reuse):
                        attn_res = h = cnn_block(decoder_embedded, dilation_rate, 
                                                 pad_sz, self.size_layer, self.kernel_size)
                        C = []
                        for j in range(self.n_attn_heads):
                            h_ = tf.layers.dense(h, self.size_layer//self.n_attn_heads)
                            g_ = tf.layers.dense(g, self.size_layer//self.n_attn_heads)
                            zu_ = tf.layers.dense(encoder_embedded, self.size_layer//self.n_attn_heads)
                            ze_ = tf.layers.dense(encoder_embedded, self.size_layer//self.n_attn_heads)

                            d = tf.layers.dense(h_, self.size_layer//self.n_attn_heads) + g_
                            dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1]))
                            a = tf.nn.softmax(dz)
                            c_ = tf.matmul(a, ze_)
                            C.append(c_)

                        c = tf.concat(C, 2)
                        h = tf.layers.dense(attn_res + c, self.size_layer)
                        decoder_embedded += h

                return tf.layers.dense(decoder_embedded, self.dict_size)

In [15]:
size_layer = 256
num_layers = 4
embedded_size = 256
learning_rate = 1e-3
batch_size = 12
epoch = 20

In [16]:
def beam_search_decoding(length = 20, beam_width = 5):
    initial_ids = tf.fill([model.batch_size], GO)
    
    def symbols_to_logits(ids):
        x = tf.contrib.seq2seq.tile_batch(model.X, beam_width)
        logits = model.forward(x, ids, reuse = True)
        return logits[:, tf.shape(ids)[1]-1, :]

    final_ids, final_probs = beam_search.beam_search(
        symbols_to_logits,
        initial_ids,
        beam_width,
        length,
        len(dictionary),
        0.0,
        eos_id = EOS)
    
    return final_ids

In [17]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Summarization(size_layer, num_layers, embedded_size, 
                      len(dictionary), learning_rate)
model.generate = beam_search_decoding()
sess.run(tf.global_variables_initializer())

In [18]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [19]:
batch_x, _ = pad_sentence_batch(train_X[:10], PAD)
batch_y, _ = pad_sentence_batch(train_Y[:10], PAD)
l, acc, loss, _ = sess.run([model.training_logits, model.accuracy, model.cost, model.optimizer], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})

In [42]:
from keras.preprocessing import sequence

def calculate_rouges(predicted, batch_y):
    non = np.count_nonzero(batch_y, axis = 1)
    o = []
    for n in non:
        o.append([True for _ in range(n)])
    b = sequence.pad_sequences(o, dtype = np.bool, padding = 'post', value = False)
    batch_y = np.array(batch_y)
    rouges = []
    for i in range(predicted.shape[0]):
        a = batch_y[i][b[i]]
        p = predicted[i][b[i]]
        rouges.append(rouge.rouge_n([p], [a]))
    return np.mean(rouges)

In [43]:
from tqdm import tqdm
from sklearn.utils import shuffle
import time

for EPOCH in range(epoch):
    lasttime = time.time()
    total_loss, total_accuracy, total_loss_test, total_accuracy_test = 0, 0, 0, 0
    rouge_train, rouge_test = 0, 0
    train_X, train_Y = shuffle(train_X, train_Y)
    test_X, test_Y = shuffle(test_X, test_Y)
    pbar = tqdm(range(0, len(train_X), batch_size), desc='train minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(train_X[k: min(k+batch_size,len(train_X))], PAD)
        batch_y, _ = pad_sentence_batch(train_Y[k: min(k+batch_size,len(train_X))], PAD)
        l, acc, loss, _ = sess.run([model.training_logits, model.accuracy, model.cost, model.optimizer], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        total_loss += loss
        total_accuracy += acc
        r = calculate_rouges(np.argmax(l, axis = 2), batch_y)
        rouge_train += r
        pbar.set_postfix(cost=loss, accuracy = acc, rouge_2 = r)
    
    pbar = tqdm(range(0, len(test_X), batch_size), desc='test minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(test_X[k: min(k+batch_size,len(test_X))], PAD)
        batch_y, _ = pad_sentence_batch(test_Y[k: min(k+batch_size,len(test_X))], PAD)
        l, acc, loss = sess.run([model.training_logits, model.accuracy, model.cost], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        total_loss_test += loss
        total_accuracy_test += acc
        r = calculate_rouges(np.argmax(l, axis = 2), batch_y)
        rouge_test += r
        pbar.set_postfix(cost=loss, accuracy = acc, rouge_2 = r)
        
    total_loss /= (len(train_X) / batch_size)
    total_accuracy /= (len(train_X) / batch_size)
    total_loss_test /= (len(test_X) / batch_size)
    total_accuracy_test /= (len(test_X) / batch_size)
    rouge_train /= (len(train_X) / batch_size)
    rouge_test /= (len(test_X) / batch_size)
        
    print('epoch: %d, avg loss: %f, avg accuracy: %f'%(EPOCH, total_loss, total_accuracy))
    print('epoch: %d, avg loss test: %f, avg accuracy test: %f'%(EPOCH, total_loss_test, total_accuracy_test))
    print('epoch: %d, avg train rouge: %f, avg test rouge: %f'%(EPOCH, rouge_train, rouge_test))

train minibatch loop: 100%|██████████| 2240/2240 [08:49<00:00,  4.58it/s, accuracy=0.167, cost=4.41, rouge_2=0]      
test minibatch loop: 100%|██████████| 249/249 [00:27<00:00,  9.73it/s, accuracy=0.277, cost=5.74, rouge_2=0.0973] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 0, avg loss: 6.818431, avg accuracy: 0.171947
epoch: 0, avg loss test: 6.112989, avg accuracy test: 0.207118
epoch: 0, avg train rouge: 0.030709, avg test rouge: 0.065109


train minibatch loop: 100%|██████████| 2240/2240 [08:43<00:00,  4.69it/s, accuracy=0.0714, cost=6.63, rouge_2=0]     
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00, 10.17it/s, accuracy=0.186, cost=6.17, rouge_2=0.0222]
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 1, avg loss: 4.999484, avg accuracy: 0.274408
epoch: 1, avg loss test: 5.525000, avg accuracy test: 0.279031
epoch: 1, avg train rouge: 0.106283, avg test rouge: 0.117773


train minibatch loop: 100%|██████████| 2240/2240 [08:43<00:00,  4.67it/s, accuracy=0, cost=8.2, rouge_2=0]          
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.86it/s, accuracy=0.191, cost=7.59, rouge_2=0.0507]
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 2, avg loss: 3.920182, avg accuracy: 0.354289
epoch: 2, avg loss test: 5.413165, avg accuracy test: 0.322328
epoch: 2, avg train rouge: 0.169454, avg test rouge: 0.159883


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.80it/s, accuracy=0.333, cost=3.99, rouge_2=0.125] 
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.83it/s, accuracy=0.326, cost=6.09, rouge_2=0.229]  
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 3, avg loss: 3.199538, avg accuracy: 0.428540
epoch: 3, avg loss test: 5.530151, avg accuracy test: 0.338215
epoch: 3, avg train rouge: 0.227199, avg test rouge: 0.178586


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  5.02it/s, accuracy=0.308, cost=3.17, rouge_2=0.0833]
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00, 10.09it/s, accuracy=0.382, cost=6.62, rouge_2=0.206] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 4, avg loss: 2.745188, avg accuracy: 0.483630
epoch: 4, avg loss test: 5.909317, avg accuracy test: 0.348528
epoch: 4, avg train rouge: 0.275914, avg test rouge: 0.187344


train minibatch loop: 100%|██████████| 2240/2240 [08:43<00:00,  4.86it/s, accuracy=0.778, cost=0.666, rouge_2=0.5]  
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00, 10.09it/s, accuracy=0.407, cost=6.28, rouge_2=0.217]  
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 5, avg loss: 2.430390, avg accuracy: 0.524540
epoch: 5, avg loss test: 6.273368, avg accuracy test: 0.349211
epoch: 5, avg train rouge: 0.316919, avg test rouge: 0.190898


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.86it/s, accuracy=0.667, cost=0.853, rouge_2=0.5]  
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.59it/s, accuracy=0.289, cost=7.63, rouge_2=0.121] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 6, avg loss: 2.238658, avg accuracy: 0.551860
epoch: 6, avg loss test: 6.578097, avg accuracy test: 0.347707
epoch: 6, avg train rouge: 0.344644, avg test rouge: 0.189197


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.92it/s, accuracy=0.889, cost=1.02, rouge_2=0.875] 
test minibatch loop: 100%|██████████| 249/249 [00:24<00:00,  9.95it/s, accuracy=0.385, cost=6.06, rouge_2=0.314]  
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 7, avg loss: 2.053173, avg accuracy: 0.580353
epoch: 7, avg loss test: 6.919445, avg accuracy test: 0.361813
epoch: 7, avg train rouge: 0.377351, avg test rouge: 0.201893


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.62it/s, accuracy=0.714, cost=0.658, rouge_2=0.667]
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.72it/s, accuracy=0.439, cost=6.05, rouge_2=0.263] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 8, avg loss: 1.901131, avg accuracy: 0.605320
epoch: 8, avg loss test: 7.117372, avg accuracy test: 0.352279
epoch: 8, avg train rouge: 0.405827, avg test rouge: 0.208454


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.87it/s, accuracy=0.333, cost=4.09, rouge_2=0.125] 
test minibatch loop: 100%|██████████| 249/249 [00:24<00:00, 10.14it/s, accuracy=0.495, cost=6.77, rouge_2=0.33]  
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 9, avg loss: 1.814632, avg accuracy: 0.619794
epoch: 9, avg loss test: 7.616632, avg accuracy test: 0.368821
epoch: 9, avg train rouge: 0.423145, avg test rouge: 0.217142


train minibatch loop: 100%|██████████| 2240/2240 [08:43<00:00,  4.83it/s, accuracy=0.25, cost=4.28, rouge_2=0]      
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.78it/s, accuracy=0.181, cost=10.7, rouge_2=0.0389]
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 10, avg loss: 1.716998, avg accuracy: 0.638041
epoch: 10, avg loss test: 8.013822, avg accuracy test: 0.356700
epoch: 10, avg train rouge: 0.444813, avg test rouge: 0.213070


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.79it/s, accuracy=0, cost=6.24, rouge_2=0]         
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.77it/s, accuracy=0.315, cost=10.5, rouge_2=0.213] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 11, avg loss: 1.712711, avg accuracy: 0.642915
epoch: 11, avg loss test: 8.331740, avg accuracy test: 0.368230
epoch: 11, avg train rouge: 0.451198, avg test rouge: 0.221297


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.89it/s, accuracy=0.444, cost=3.05, rouge_2=0.125] 
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.33it/s, accuracy=0.327, cost=11.6, rouge_2=0.242] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 12, avg loss: 1.631060, avg accuracy: 0.657665
epoch: 12, avg loss test: 8.894247, avg accuracy test: 0.360014
epoch: 12, avg train rouge: 0.469315, avg test rouge: 0.210628


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  5.02it/s, accuracy=0.462, cost=5.47, rouge_2=0.25]  
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.88it/s, accuracy=0.427, cost=7.48, rouge_2=0.218] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 13, avg loss: 1.689837, avg accuracy: 0.657037
epoch: 13, avg loss test: 9.467714, avg accuracy test: 0.355562
epoch: 13, avg train rouge: 0.468034, avg test rouge: 0.207786


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.79it/s, accuracy=0.571, cost=2.48, rouge_2=0.167] 
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.94it/s, accuracy=0.484, cost=8.15, rouge_2=0.399] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 14, avg loss: 1.637637, avg accuracy: 0.665320
epoch: 14, avg loss test: 9.608679, avg accuracy test: 0.354063
epoch: 14, avg train rouge: 0.478025, avg test rouge: 0.218937


train minibatch loop: 100%|██████████| 2240/2240 [08:43<00:00,  4.95it/s, accuracy=0.625, cost=4.6, rouge_2=0.286]  
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.84it/s, accuracy=0.592, cost=8.91, rouge_2=0.474] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 15, avg loss: 1.577526, avg accuracy: 0.678844
epoch: 15, avg loss test: 10.291262, avg accuracy test: 0.355242
epoch: 15, avg train rouge: 0.497572, avg test rouge: 0.213384


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.80it/s, accuracy=0.714, cost=1.64, rouge_2=0.5]   
test minibatch loop: 100%|██████████| 249/249 [00:24<00:00, 10.32it/s, accuracy=0.411, cost=7.62, rouge_2=0.245] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 16, avg loss: 1.452268, avg accuracy: 0.698912
epoch: 16, avg loss test: 10.181980, avg accuracy test: 0.362795
epoch: 16, avg train rouge: 0.521833, avg test rouge: 0.226739


train minibatch loop: 100%|██████████| 2240/2240 [08:43<00:00,  4.63it/s, accuracy=0.5, cost=2.1, rouge_2=0]        
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00,  9.94it/s, accuracy=0.348, cost=10.3, rouge_2=0.209] 
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 17, avg loss: 1.444384, avg accuracy: 0.704302
epoch: 17, avg loss test: 10.688622, avg accuracy test: 0.367093
epoch: 17, avg train rouge: 0.528785, avg test rouge: 0.231045


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.87it/s, accuracy=0.429, cost=3.31, rouge_2=0.167] 
test minibatch loop: 100%|██████████| 249/249 [00:25<00:00, 10.04it/s, accuracy=0.312, cost=13, rouge_2=0.206]   
train minibatch loop:   0%|          | 0/2240 [00:00<?, ?it/s]

epoch: 18, avg loss: 1.439127, avg accuracy: 0.710430
epoch: 18, avg loss test: 11.118785, avg accuracy test: 0.373973
epoch: 18, avg train rouge: 0.537580, avg test rouge: 0.234384


train minibatch loop: 100%|██████████| 2240/2240 [08:42<00:00,  4.82it/s, accuracy=1, cost=0.0376, rouge_2=1]       
test minibatch loop: 100%|██████████| 249/249 [00:24<00:00,  9.55it/s, accuracy=0.313, cost=13.9, rouge_2=0.25]   

epoch: 19, avg loss: 1.424267, avg accuracy: 0.715768
epoch: 19, avg loss test: 11.545753, avg accuracy test: 0.373577
epoch: 19, avg train rouge: 0.544493, avg test rouge: 0.239260





In [22]:
generated = sess.run(model.generate, feed_dict = {model.X: [test_X[0]]})[0]

In [23]:
for g in generated:
    print(' '.join([rev_dictionary[i] for i in g]))

GO pembantu dirakam ke atas tahan tubuh anggota utara EOS PAD
GO pembantu dirakam EOS PAD PAD PAD PAD PAD PAD PAD
GO pembantu beragama ke tingkat ekspor anggota ke ke masjid EOS
GO pembantu dirakam , cuba rombak polis siasat anggotanya direman EOS
GO pembantu beragama ke tingkat ekspor anggota ke tan sri EOS


In [25]:
' '.join([rev_dictionary[i] for i in test_Y[0]])

'pegawai , anggota polis dibebaskan dengan jaminan sprm EOS'