In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf

import argparse
import time
import os
from six.moves import cPickle

import collections
from six.moves import cPickle
import numpy as np
import re
import itertools

from tensorflow.python.ops import rnn_cell
from tensorflow.python.ops import seq2seq

import random

from beam import BeamSearch
from utils import TextLoader

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='data/tiny',
                    help='data directory containing input.txt')
parser.add_argument('--save_dir', type=str, default='save',
                    help='directory to store checkpointed models')
parser.add_argument('--rnn_size', type=int, default=4,
                    help='size of RNN hidden state')
parser.add_argument('--num_layers', type=int, default=1,
                    help='number of layers in the RNN')
parser.add_argument('--model', type=str, default='rnn',
                    help='rnn, gru, or lstm')
parser.add_argument('--batch_size', type=int, default=3,
                    help='minibatch size')
parser.add_argument('--seq_length', type=int, default=2,
                    help='RNN sequence length')
parser.add_argument('--num_epochs', type=int, default=50,
                    help='number of epochs')
parser.add_argument('--save_every', type=int, default=100,
                    help='save frequency')
parser.add_argument('--grad_clip', type=float, default=5.,
                    help='clip gradients at this value')
parser.add_argument('--learning_rate', type=float, default=0.002,
                    help='learning rate')
parser.add_argument('--decay_rate', type=float, default=0.97,
                    help='decay rate for rmsprop')
parser.add_argument('--init_from', type=str, default=None,
                    help="""continue training from saved model at this path. Path must contain files saved by previous training process:
                        'config.pkl'        : configuration;
                        'vocab.pkl'   : vocabulary definitions;
                        'checkpoint'        : paths to model file(s) (created by tf).
                                              Note: this file contains absolute paths, be careful when moving files around;
                        'model.ckpt-*'      : file(s) with model definition (created by tf)
                    """)
parser.add_argument('-f', type=str, default=None)
args = parser.parse_args()

print("batch_size:", args.batch_size)
print("seq_length:", args.seq_length)

batch_size: 3
seq_length: 2


In [3]:
print(tf.__version__)

0.12.1


In [4]:
data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
args.vocab_size = data_loader.vocab_size

print("data_loader.vocab")
for item in data_loader.vocab.items():
    print("{0:10s}:{1:2d}".format(item[0], item[1]))

print("data_loader.words")
for item in data_loader.words:
    print("{0:10s}".format(item))

print("data_loader.vocab_size:", data_loader.vocab_size)
print()

print("data_loader.tensor", data_loader.tensor)
print()

print("data_loader.num_batches", data_loader.num_batches)   # tensor.size / (batch_size * seq_length) = 6 / (2 * 3)
print()

print("data_loader.x_batches", data_loader.x_batches)
print("data_loader.y_batches", data_loader.y_batches)

reading text file
data_loader.vocab
죽느냐 : 6
그것이 : 0
윌리엄 : 5
말했습니다.: 1
세익스피어는: 4
문제라고: 2
사느냐 : 3
data_loader.words
그것이 
말했습니다.
문제라고
사느냐 
세익스피어는
윌리엄 
죽느냐 
data_loader.vocab_size: 7

data_loader.tensor [5 4 6 3 0 2 1 5 4 6 3 0 2 1 5 4 6 3]

data_loader.num_batches 3

data_loader.x_batches [array([[5, 4],
       [1, 5],
       [2, 1]]), array([[6, 3],
       [4, 6],
       [5, 4]]), array([[0, 2],
       [3, 0],
       [6, 3]])]
data_loader.y_batches [array([[4, 6],
       [5, 4],
       [1, 5]]), array([[3, 0],
       [6, 3],
       [4, 6]]), array([[2, 1],
       [0, 2],
       [3, 5]])]


In [5]:
cell = rnn_cell.BasicRNNCell(args.rnn_size)              # rnn_size=4
cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)   # num_layers=2

In [6]:
session = tf.InteractiveSession()

In [7]:
input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])  #(3, 2)
targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])     #(3, 2)
initial_state = cell.zero_state(args.batch_size, tf.float32)

In [8]:
print(initial_state)

(<tf.Tensor 'zeros:0' shape=(3, 4) dtype=float32>,)


In [9]:
print(initial_state[0].eval())

[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


In [10]:
batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32)
inc_batch_pointer_op = tf.assign(batch_pointer, batch_pointer + 1)
epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False, dtype=tf.int32)
batch_time = tf.Variable(0.0, name="batch_time", trainable=False)

In [11]:
batch_pointer.assign(0)

<tf.Tensor 'Assign_1:0' shape=() dtype=int32_ref>

In [12]:
tf.global_variables_initializer().run()

In [13]:
print(batch_pointer.eval())

0


In [14]:
with tf.variable_scope('rnnlm', reuse=None):
    softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])    #(4, 7)
    softmax_b = tf.get_variable("softmax_b", [args.vocab_size])  #7
    embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])    #(7, 4)
    embedding_lookup = tf.nn.embedding_lookup(embedding, input_data)
    inputs_temp = tf.split(1, args.seq_length, embedding_lookup)
    inputs = [tf.squeeze(input_, [1]) for input_ in inputs_temp]

In [15]:
print(softmax_w)
print(softmax_b)
print(embedding)
print(embedding_lookup)
print(inputs_temp)
print(inputs)

Tensor("rnnlm/softmax_w/read:0", shape=(4, 7), dtype=float32)
Tensor("rnnlm/softmax_b/read:0", shape=(7,), dtype=float32)
Tensor("rnnlm/embedding/read:0", shape=(7, 4), dtype=float32)
Tensor("rnnlm/embedding_lookup:0", shape=(3, 2, 4), dtype=float32)
[<tf.Tensor 'rnnlm/split:0' shape=(3, 1, 4) dtype=float32>, <tf.Tensor 'rnnlm/split:1' shape=(3, 1, 4) dtype=float32>]
[<tf.Tensor 'rnnlm/Squeeze:0' shape=(3, 4) dtype=float32>, <tf.Tensor 'rnnlm/Squeeze_1:0' shape=(3, 4) dtype=float32>]


In [16]:
with tf.variable_scope('rnnlm', reuse=None):
    outputs, last_state = seq2seq.rnn_decoder(inputs, initial_state, cell, scope='rnnlm')
    output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])

    logits = tf.matmul(output, softmax_w) + softmax_b
    probs = tf.nn.softmax(logits)

    loss = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(targets, [-1])],
                                            [tf.ones([args.batch_size * args.seq_length])],
                                            args.vocab_size)
    
    cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
    final_state = last_state
    
    lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), args.grad_clip)
    optimizer = tf.train.AdamOptimizer(lr)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

In [44]:
x, y = data_loader.next_batch()

In [45]:
state = session.run(initial_state)

In [46]:
feed = {input_data: x, targets: y, initial_state: state}

In [47]:
tf.global_variables_initializer().run()

In [48]:
train_loss, state, _, _ = session.run([cost, final_state, train_op, inc_batch_pointer_op], feed)

In [52]:
print(output)

Tensor("rnnlm_1/Reshape:0", shape=(6, 4), dtype=float32)


In [49]:
print(x)
print(y)

[[6 3]
 [4 6]
 [5 4]]
[[3 0]
 [6 3]
 [4 6]]


In [51]:
for tvar in tvars:
    print(tvar)
    print(tvar.eval())
    print()

Tensor("rnnlm/softmax_w/read:0", shape=(4, 7), dtype=float32)
[[-0.63689959 -0.47216704  0.297059    0.16834491 -0.54989243 -0.32636529
   0.17637902]
 [-0.78735244  0.54169887  0.53561395  0.48295254  0.10195148 -0.75769854
   0.37552732]
 [ 0.15824944  0.39673597 -0.66539979  0.50263208 -0.83951503 -0.26330036
   0.81498832]
 [ 0.22424036 -0.20029074 -0.25088799  0.77444297 -0.15448761  0.014449
  -0.59497726]]

Tensor("rnnlm/softmax_b/read:0", shape=(7,), dtype=float32)
[ 1.08843982 -0.6445905   0.13676846 -0.36762774 -1.26939881  0.41135585
  0.97577727]

Tensor("rnnlm/embedding/read:0", shape=(7, 4), dtype=float32)
[[ 0.09611398 -0.21043825  0.4852308  -0.34034497]
 [-0.08064193  0.60480046 -0.57025039  0.02360171]
 [-0.29699752 -0.48134381  0.35710895  0.35222185]
 [-0.6222471  -0.35754392  0.5245924  -0.05367881]
 [-0.49833781  0.47993124  0.14076376  0.58626771]
 [ 0.13575166 -0.18456259  0.39192641  0.23433405]
 [ 0.09423882  0.40222573 -0.46399361  0.13300931]]

Tensor("rnnlm