In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import os
from collections import Counter
from typing import NamedTuple, List, Dict, Tuple
import random
from datetime import datetime
import time
import math

In [3]:
from utils.config import Config
from data_loader.ptb_datasource import PTBDataSource

# config, data

In [4]:
units = [512]
layers = [2]
lrs = [0.001]
configs = []
for l in layers:
    for u in units:
        for lr in lrs:
            configs.append(Config(num_layers=l, num_units=u, learning_rate=lr, log_dir='./logs/rnn/'))

# model

In [5]:
class RNN:
    
    def __init__(self, config: Config, vocab_size):
        self.config = config
        self.vocab_size = vocab_size
        self._create_placeholder()
        self._create_model()
        self.loss = self._create_loss()
        self.accuracy = self._create_acc()
        self.perplexity = self._create_perplexity()
    
    def _create_placeholder(self):
        self.is_training = tf.placeholder(shape=(), dtype=tf.bool, name='is_training')
        self.inputs = tf.placeholder(shape=[None, self.config.max_length], dtype=tf.int32, name='inputs')
        self.inputs_length = tf.placeholder(shape=[None], dtype=tf.int32, name='inputs_length')
        self.target_ids = tf.placeholder(shape=[None], dtype=tf.int32, name='target_ids')
    
    def _create_model(self):
        self.global_step = tf.train.get_or_create_global_step()
        embedded_inputs = self._embedding(self.inputs)
        _, encoder_state = self._encode(embedded_inputs)
        # encoder_state = tf.layers.dense(encoder_state, num_units, activation=tf.nn.relu, name='hidden_layer')
        self.outputs_logits = tf.layers.dense(encoder_state, self.vocab_size, name='outputs_layer')
        self.predicted_id = tf.to_int32(tf.argmax(self.outputs_logits, axis=-1))
        
    def _create_loss(self):
        target_ids_one_hot = tf.one_hot(self.target_ids, self.vocab_size)
        target_ids_smoothed = self._label_smoothing(target_ids_one_hot)
        cross_ent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.outputs_logits, labels=target_ids_smoothed)
        return tf.reduce_mean(cross_ent)
        
    def _create_acc(self):
        return tf.reduce_mean(tf.to_float(tf.equal(self.target_ids, self.predicted_id)))
    
    def _create_perplexity(self):
        probs = tf.nn.softmax(self.outputs_logits)
        print(probs)
        target_probs = tf.gather(probs, self.target_ids, axis=1)
        print(target_probs)
        outputs = tf.reduce_mean(1.0/target_probs)
        print(outputs)
        return outputs
    
    def _embedding(self, inputs):
        lookup_table = tf.get_variable('lookup_table', shape=[self.vocab_size, self.config.embedding_size], dtype=tf.float32)
        embedded_inputs = tf.nn.embedding_lookup(lookup_table, inputs)
        return embedded_inputs
    
    def _encode(self, embedded_inputs):
        outputs, final_state = self._bidirectional_cell(
            embedded_inputs,
            self.config.num_layers,
            self.config.num_units,
            self.config.dropout_in_rate,
            self.config.dropout_out_rate
        )
        return outputs, final_state
    
    def _bidirectional_cell(self, inputs, num_layers, num_units, dropout_in_rate, dropout_out_rate):
        cell_fw = self._gru(num_layers, num_units, dropout_in_rate, dropout_out_rate, name='cell_fw')
        cell_bw = self._gru(num_layers, num_units, dropout_in_rate, dropout_out_rate, name='cell_bw')
        (fw_outputs, bw_outputs), (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs,
            sequence_length=self.inputs_length,
            dtype=tf.float32,
            scope='bidirectional_cells')
        outputs = tf.concat([fw_outputs, bw_outputs], axis=-1)
        final_state = tf.reduce_sum([fw_state, bw_state], axis=0)
        final_state = tf.concat(tf.unstack(final_state, axis=0), axis=-1)
        return outputs, final_state
    
    def _gru(self, num_layers: int, num_units: int, dropout_in_rate: float, dropout_out_rate: float, name: str):
        cells = []
        for l in range(num_layers):
            cell = tf.nn.rnn_cell.GRUCell(num_units, tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer(), name=name)
            if l == 0:
                cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=1-dropout_in_rate)
            if l == num_layers-1:
                cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=1-dropout_out_rate)
            cells.append(cell)
        return tf.nn.rnn_cell.MultiRNNCell(cells)
    
    def _label_smoothing(self, inputs, epsilon: float=0.1):
        feature_dim = inputs.get_shape().as_list()[-1]
        return (1-epsilon) * inputs + (epsilon / feature_dim)

In [6]:
num_epoch = 400

In [7]:
def start():
    with tf.device('/device:GPU:0'):
        now = datetime.now()
        logdir = now.strftime("%Y%m%d-%H%M%S") + "/"

        datasource = PTBDataSource(config)

        rnn = RNN(config, datasource.vocab_size)
        optimizer = tf.train.AdamOptimizer(config.learning_rate)
        train_vars = tf.trainable_variables()
        gradients = tf.gradients(rnn.loss, train_vars)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, config.grad_clip)
        train_op = optimizer.apply_gradients(zip(clipped_gradients, train_vars), global_step=rnn.global_step)
        with tf.name_scope('training'):
            s_loss = tf.summary.scalar('loss', rnn.loss)
            s_acc = tf.summary.scalar('accuracy', rnn.accuracy)
            s_perp = tf.summary.scalar('perplexity', rnn.perplexity)
            s_trains = tf.summary.merge([s_loss, s_acc, s_perp])
        with tf.name_scope('test'):
            test_s_acc = tf.summary.scalar('accuracy', rnn.accuracy)
            test_s_perp = tf.summary.scalar('perplexity', rnn.perplexity)
        
        tf_config = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=True,
            gpu_options=tf.GPUOptions(
                allow_growth=True,
            ))
        with tf.Session(config=tf_config) as sess:
            saver = tf.train.Saver()
            writer = tf.summary.FileWriter(config.to_log_dir() + '/' + logdir, sess.graph)
            sess.run(tf.global_variables_initializer())
            for i in range(num_epoch):
                start = time.time()
                datasource.shuffle()
                batch_list = datasource.feed_dict_list(rnn)
                losses = []
                accuracies = []
                perplexities = []
                for (j, fd) in enumerate(batch_list):
                    loss, acc, perp, _, smr_train, step = sess.run([rnn.loss, rnn.accuracy, rnn.perplexity, train_op, s_trains, rnn.global_step], feed_dict=fd)
                    losses.append(loss)
                    accuracies.append(acc)
                    perplexities.append(perp)
                    writer.add_summary(smr_train, step)
                    if j % 100 == 0:
                        #print('loss: {:.3f}, acc: {:.3f}'.format(loss, acc))
                        inference(sess, rnn, datasource, writer, test_s_acc, test_s_perp, step)
                elapsed = time.time() - start
                print('epoch {}/{} finished, {} step, elapsed {} sec. loss: {:.3f}, accuracy: {:.3f}, perlexity: {:.3f}'.format(i+1, num_epoch, step, elapsed, np.average(losses), np.average(accuracies), np.average(perplexities)))
                # loss が nan なら 飛ばす
                if math.isnan(np.average(losses)):
                    print('loss is nan')
                    break
                saver.save(sess, config.to_ckpt_path(), global_step=step)

In [8]:
def inference(sess, model, datasource, writer, s_acc, s_perp, step):
    with tf.name_scope('inference'):
        test_list = datasource.feed_test_list(model)
        acc, smr_acc, smr_perp = sess.run([model.accuracy, s_acc, s_perp], feed_dict=test_list)
        writer.add_summary(smr_acc, step)
        writer.add_summary(smr_perp, step)

In [None]:
for config in configs:
    with tf.Graph().as_default():
        start()

Instructions for updating:
seq_dim is deprecated, use seq_axis instead
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
Tensor("Softmax:0", shape=(?, 10000), dtype=float32, device=/device:GPU:0)
Tensor("GatherV2:0", shape=(?, ?), dtype=float32, device=/device:GPU:0)
Tensor("Mean_2:0", shape=(), dtype=float32, device=/device:GPU:0)
epoch 1/400 finished, 327 step, elapsed 36.97825002670288 sec. loss: 7.288, accuracy: 0.070, perlexity: 241713315840.000
epoch 2/400 finished, 655 step, elapsed 36.487141847610474 sec. loss: 6.827, accuracy: 0.115, perlexity: 57184.367
epoch 3/400 finished, 983 step, elapsed 36.46529030799866 sec. loss: 6.579, accuracy: 0.140, perlexity: 25947.561
epoch 4/400 finished, 1311 step, elapsed 36.19972801208496 sec. loss: 6.344, accuracy: 0.159, perlexity: 45814.141
epoch 5/400 finished, 1639 step, elapsed 36.34585881233215 sec. loss: 6.259, accuracy: 0.169, perlexity: 66781.391
epoch 6/400 finished, 1967 step, elapsed 36.75216031074524 se

epoch 62/400 finished, 20335 step, elapsed 35.7199649810791 sec. loss: 4.527, accuracy: 0.336, perlexity: 87273160.000
epoch 63/400 finished, 20663 step, elapsed 36.40294170379639 sec. loss: 4.522, accuracy: 0.335, perlexity: 199316352.000
epoch 64/400 finished, 20991 step, elapsed 36.331058740615845 sec. loss: 4.512, accuracy: 0.340, perlexity: 288825664.000
epoch 65/400 finished, 21319 step, elapsed 36.93815636634827 sec. loss: 4.526, accuracy: 0.337, perlexity: 10384871424.000
epoch 66/400 finished, 21647 step, elapsed 36.427062034606934 sec. loss: 4.497, accuracy: 0.339, perlexity: 883379456.000
epoch 67/400 finished, 21975 step, elapsed 36.297351121902466 sec. loss: 4.493, accuracy: 0.339, perlexity: 763732992.000
epoch 68/400 finished, 22303 step, elapsed 36.648056507110596 sec. loss: 4.406, accuracy: 0.353, perlexity: 1750180608.000
epoch 69/400 finished, 22631 step, elapsed 36.06713938713074 sec. loss: 4.393, accuracy: 0.357, perlexity: 2678583296.000
epoch 70/400 finished, 229

epoch 129/400 finished, 42311 step, elapsed 36.11920094490051 sec. loss: 3.798, accuracy: 0.450, perlexity: 245958508544.000
epoch 130/400 finished, 42639 step, elapsed 36.045727252960205 sec. loss: 3.810, accuracy: 0.453, perlexity: 1998436433920.000
epoch 131/400 finished, 42967 step, elapsed 37.263872146606445 sec. loss: 3.832, accuracy: 0.438, perlexity: 39421882368.000
epoch 132/400 finished, 43295 step, elapsed 36.24377703666687 sec. loss: 3.724, accuracy: 0.466, perlexity: 277750743040.000
epoch 133/400 finished, 43623 step, elapsed 36.22351312637329 sec. loss: 3.797, accuracy: 0.453, perlexity: 34968281088.000
epoch 134/400 finished, 43951 step, elapsed 36.774821043014526 sec. loss: 3.756, accuracy: 0.461, perlexity: inf
epoch 135/400 finished, 44279 step, elapsed 36.12817740440369 sec. loss: 3.747, accuracy: 0.461, perlexity: 1371308032000.000
epoch 136/400 finished, 44607 step, elapsed 36.07532334327698 sec. loss: 3.739, accuracy: 0.463, perlexity: inf
epoch 137/400 finished,