# Imports

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import re
import random
import os

  from ._conv import register_converters as _register_converters


# Model

# v1 - Current
* One layer of LSTM
* Non-batch train
* Problems
    * Too many small ops, slowing down trainning speed, hard for GPU to process

## Dataset Initialization

In [2]:
# Dataset
embed = hub.Module("https://tfhub.dev/google/nnlm-en-dim50/1")
string2idtable = tf.contrib.lookup.index_table_from_file(vocabulary_file="vocabulary-shakespeare-Copy1.txt", num_oov_buckets=0)
id2stringtabel = tf.contrib.lookup.index_to_string_table_from_file(vocabulary_file="vocabulary-shakespeare-Copy1.txt")
def _insertSpace(sentence):
    sentence = sentence.decode()
    sentence = sentence.lower()
    sentence = re.sub(r'([\W\d])', r' \1 ', sentence)
    return sentence
def _getLabel(sentence):
    splited = tf.string_split([sentence]).values
    sentence = splited
    sentence = sentence[0:-1]
    ids = string2idtable.lookup(splited)
    ids = tf.one_hot(ids, 11405)
    return sentence, ids
filenames = ["poems/shakespeare/sonnets.txt"]
dataset = tf.data.TextLineDataset(filenames)
dataset = dataset.map(lambda sentence: tf.py_func(_insertSpace, [sentence], tf.string))
dataset = dataset.map(_getLabel)
dataset = dataset.shuffle(buffer_size=10000)
# dataset = dataset.batch(4)
dataset = dataset.repeat()
iterator = dataset.make_initializable_iterator()
next_poem = iterator.get_next()

INFO:tensorflow:Using /var/folders/qz/dx3zfgtj2lqf70kv8b90s8wr0000gp/T/tfhub_modules to cache modules.
INFO:tensorflow:Initialize variable module/embeddings/part_0:0 from checkpoint b'/var/folders/qz/dx3zfgtj2lqf70kv8b90s8wr0000gp/T/tfhub_modules/7f07056e3a4c9f125d5bd920ef3883605d8556a8/variables/variables' with embeddings


## Model initialization and Graph building

In [3]:
# with tf.Graph().as_default():
# Graph
# Variables
with tf.variable_scope("model_v1", reuse=tf.AUTO_REUSE):
    softmax_w = tf.get_variable("softmax_w", shape=[400, 11405], initializer=tf.random_normal_initializer)
    softmax_b = tf.get_variable("softmax_b", shape=[1, 11405], initializer=tf.random_normal_initializer)

    # TODO Use tuple for state
    lstm = tf.contrib.rnn.LSTMCell(400, state_is_tuple=False, initializer=tf.random_normal_initializer, reuse=tf.AUTO_REUSE, name="LSTM1")

# Model
sentence, label = next_poem
sentence = embed(sentence)
sentence = tf.concat([tf.zeros([1, 50]), sentence], 0, name="Insert_X_0")

state = tf.placeholder(shape=[1, lstm.state_size], dtype=tf.float32, name="Previous_State")

# Input and Label
x = tf.placeholder(shape=[1, 50], dtype=tf.float32, name="Input_Word")
y = tf.placeholder(shape=[1, 11405], dtype=tf.int32, name="Target_Word")

input_word = tf.reshape(x, [1, 50])
output, out_state = lstm(input_word, state)
logits = tf.add(tf.matmul(output, softmax_w), softmax_b)
possibility = tf.nn.softmax(logits=logits, name="Possibilities")
word_index = tf.argmax(possibility, axis=-1, name="Predict_Word_index")
word = id2stringtabel.lookup(word_index)
loss_op = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits)
tf.summary.scalar(name="loss", tensor=loss_op)
merged = tf.summary.merge_all()

# Train Ops
with tf.variable_scope("model_v1"):
    optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimize(loss_op)

# Global step as variable for persistent tensorboard output
# with tf.variable_scope("model_v1"):
#     global_step = tf.get_variable("Global_Steps", [], initializer=tf.ones_initializer, dtype=tf.int64)

# Variables saver
saver = tf.train.Saver()

sess = tf.Session()
# with tf.Session() as sess:
try:
    saver.restore(sess, "tmp/model/model_v1/model.ckpt")
except tf.errors.NotFoundError:
    pass
sess.run(tf.global_variables_initializer())
tf.tables_initializer().run(session=sess)
sess.run(iterator.initializer)

# Trainning log
# TODO writer = tf.summary.FileWriter("tmp/log/"+Modelv1+embedding_version, sess.graph)
writer = tf.summary.FileWriter("tmp/log/model_v1/", sess.graph)

# TODO Retrieve existing g_steps
# g_steps = global_step.eval(session=sess)

g_steps = 1

INFO:tensorflow:Restoring parameters from tmp/model/model_v1/model.ckpt


## Train Module

In [4]:
with tf.variable_scope("model_v1"):
    total_poems = 1000
    for _ in range(total_poems):
        wordsVec, targets = sess.run([sentence, label])
        # TODO How to Access timestep?
        # Initial
        _, pre_state = sess.run([output, out_state], feed_dict={x: [wordsVec[0]], y: [targets[0]], state: np.zeros([1, lstm.state_size], dtype=float)})
        for i in range(1, wordsVec.shape[0]):
            # g_steps = global_step.eval(session=sess)

            _, pre_state, loss, summary, out = sess.run([train_op, out_state, loss_op, merged, word], feed_dict={x: [wordsVec[i]], y: [targets[i]], state: pre_state})
            writer.add_summary(summary, global_step=g_steps)
            # TODO Should Average loss
            tf.logging.log_every_n(tf.logging.INFO, "Loss: %s | local step: %s | global step: %s | Output: %s", 100, loss, i, g_steps, out)
            if g_steps%2000 == 0:
                # Store Variables Every 2000 times
                saver.save(sess, "tmp/model/model_v1/model.ckpt")
            # global_step += 1 
            g_steps += 1
    print("Trainning times: ", total_poems)
    saver.save(sess, "tmp/model/model_v1/model.ckpt")

INFO:tensorflow:Loss: 14.2019615 | local step: 1 | global step: 1 | Output: [b'bear']
INFO:tensorflow:Loss: 51.269737 | local step: 101 | global step: 101 | Output: [b'myrtle']
INFO:tensorflow:Loss: 33.284767 | local step: 38 | global step: 201 | Output: [b'orchestra']
INFO:tensorflow:Loss: 19.081789 | local step: 138 | global step: 301 | Output: [b'reality']
INFO:tensorflow:Loss: 30.319763 | local step: 69 | global step: 401 | Output: [b'join']
INFO:tensorflow:Loss: 40.45078 | local step: 7 | global step: 501 | Output: [b'freezings']
INFO:tensorflow:Loss: 39.19845 | local step: 107 | global step: 601 | Output: [b'scared']
INFO:tensorflow:Loss: 26.631638 | local step: 51 | global step: 701 | Output: [b'simply']
INFO:tensorflow:Loss: 29.239092 | local step: 151 | global step: 801 | Output: [b'chrysler']
INFO:tensorflow:Loss: 40.369682 | local step: 100 | global step: 901 | Output: [b'explore']
INFO:tensorflow:Loss: 27.902224 | local step: 43 | global step: 1001 | Output: [b'agriculture'

## Predict Module

In [14]:
with tf.variable_scope("model_v1"):
    def pretty(input_str):
        return re.sub("xxxnewlinexxx", "\n", input_str)
    random_word = id2stringtabel.lookup(tf.constant([random.randint(0, 11404)], dtype=tf.int64))
    word_predict, prev_state = sess.run([random_word, out_state], feed_dict={x: np.zeros([1, 50]), state: np.zeros([1, lstm.state_size], dtype=float)})
    print(word_predict)
    poem = [word_predict[0].decode()]
    while word_predict[0].decode() != "xxxendxxx":
        word_predict = sess.run(embed(word_predict))
        word_predict, prev_state = sess.run([word, out_state], feed_dict={x:word_predict, state: prev_state})
        poem.append(word_predict[0].decode())
        tf.logging.log_every_n(tf.logging.INFO, "%s", 20, pretty(" ".join(poem)))
        # print(pretty(" ".join(poem)))
    # Dump to generated dir
    generated_poem = pretty(" ".join(poem))
    filename = "-".join(poem[:5])
    with open("generated/"+filename+".txt", "w") as f:
        f.write(generated_poem)

[b'toe']
INFO:tensorflow:toe ,
INFO:tensorflow:toe , 
 and that thou in love , the world of ? 
 that i ' s in the outbraves rais
INFO:tensorflow:toe , 
 and that thou in love , the world of ? 
 that i ' s in the outbraves rais give . 
 i have i not in thee , 
 and that thou wilt other so , 
 and
INFO:tensorflow:toe , 
 and that thou in love , the world of ? 
 that i ' s in the outbraves rais give . 
 i have i not in thee , 
 and that thou wilt other so , 
 and to me with a ; ; 
 the the time is of my brow , 
 and that is '
INFO:tensorflow:toe , 
 and that thou in love , the world of ? 
 that i ' s in the outbraves rais give . 
 i have i not in thee , 
 and that thou wilt other so , 
 and to me with a ; ; 
 the the time is of my brow , 
 and that is ' s besides do , to thee , 
 and you you , 
 and in prophetic the mine , 

INFO:tensorflow:toe , 
 and that thou in love , the world of ? 
 that i ' s in the outbraves rais give . 
 i have i not in thee , 
 and that thou wilt other so , 
 an

In [None]:
# Write graph to file
writer.close()

# v1 - Revised
* Multi-layers of LSTM
    * Two layers
    * Both 400 units
* Non-batch train

## Dataset Initialization

In [None]:
# Dataset
embed = hub.Module("https://tfhub.dev/google/nnlm-en-dim50/1")
string2idtable = tf.contrib.lookup.index_table_from_file(vocabulary_file="vocabulary-shakespeare.txt", num_oov_buckets=0)
id2stringtabel = tf.contrib.lookup.index_to_string_table_from_file(vocabulary_file="vocabulary-shakespeare.txt")
def _insertSpace(sentence):
    sentence = sentence.decode()
    sentence = sentence.lower()
    sentence = re.sub(r'([\W\d])', r' \1 ', sentence)
    return sentence
def _getLabel(sentence):
    splited = tf.string_split([sentence]).values
    sentence = splited
    sentence = sentence[0:-1]
    ids = string2idtable.lookup(splited)
    ids = tf.one_hot(ids, 11405)
    return sentence, ids
filenames = ["poems/shakespeare/sonnets.txt"]
dataset = tf.data.TextLineDataset(filenames)
dataset = dataset.map(lambda sentence: tf.py_func(_insertSpace, [sentence], tf.string))
dataset = dataset.map(_getLabel)
dataset = dataset.shuffle(buffer_size=10000)
# dataset = dataset.batch(4)
dataset = dataset.repeat()
iterator = dataset.make_initializable_iterator()
next_poem = iterator.get_next()

## Model initialization and Graph building

In [None]:
# with tf.Graph().as_default():
# Graph
# Variables
with tf.variable_scope("model_v1_revised", reuse=tf.AUTO_REUSE):
    softmax_w = tf.get_variable("softmax_w", shape=[400, 11405], initializer=tf.random_normal_initializer)
    softmax_b = tf.get_variable("softmax_b", shape=[1, 11405], initializer=tf.random_normal_initializer)

    # TODO Use tuple for state
    lstm = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(unit, state_is_tuple=False, initializer=tf.random_normal_initializer, reuse=tf.AUTO_REUSE) for unit in [400, 400]], state_is_tuple=False) 

# Model
sentence, label = next_poem
sentence = embed(sentence)
sentence = tf.concat([tf.zeros([1, 50]), sentence], 0, name="Insert_X_0")

state = tf.placeholder(shape=[1, lstm.state_size], dtype=tf.float32, name="Previous_State")

# Input and Label
x = tf.placeholder(shape=[1, 50], dtype=tf.float32, name="Input_Word")
y = tf.placeholder(shape=[1, 11405], dtype=tf.int32, name="Target_Word")

input_word = tf.reshape(x, [1, 50])
output, out_state = lstm(input_word, state)
logits = tf.add(tf.matmul(output, softmax_w), softmax_b)
possibility = tf.nn.softmax(logits=logits, name="Possibilities")
word_index = tf.argmax(possibility, axis=-1, name="Predict_Word_index")
word = id2stringtabel.lookup(word_index)
loss_op = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits)
tf.summary.scalar(name="loss", tensor=loss_op)
merged = tf.summary.merge_all()

# Train Ops
with tf.variable_scope("model_v1_revised"):
    optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimize(loss_op)

# Global step as variable for persistent tensorboard output
# with tf.variable_scope("model_v1"):
#     global_step = tf.get_variable("Global_Steps", [], initializer=tf.ones_initializer, dtype=tf.int64)

# Variables saver
saver = tf.train.Saver()

sess = tf.Session()
# with tf.Session() as sess:
try:
    saver.restore(sess, "tmp/model/model_v1_revised/model.ckpt")
except tf.errors.NotFoundError:
    pass
sess.run(tf.global_variables_initializer())
tf.tables_initializer().run(session=sess)
sess.run(iterator.initializer)

# Trainning log
# TODO writer = tf.summary.FileWriter("tmp/log/"+Modelv1+embedding_version, sess.graph)
writer = tf.summary.FileWriter("tmp/log/model_v1_revised/", sess.graph)

# TODO Retrieve existing g_steps
# g_steps = global_step.eval(session=sess)

g_steps = 1

## Train Module

In [None]:
with tf.variable_scope("model_v1_revised"):
    total_poems = 1000
    for _ in range(total_poems):
        wordsVec, targets = sess.run([sentence, label])
        # TODO How to Access timestep?
        # Initial
        _, pre_state = sess.run([output, out_state], feed_dict={x: [wordsVec[0]], y: [targets[0]], state: np.zeros([1, lstm.state_size], dtype=float)})
        for i in range(1, wordsVec.shape[0]):
            # g_steps = global_step.eval(session=sess)

            _, pre_state, loss, summary, out = sess.run([train_op, out_state, loss_op, merged, word], feed_dict={x: [wordsVec[i]], y: [targets[i]], state: pre_state})
            writer.add_summary(summary, global_step=g_steps)
            # TODO Should Average loss
            tf.logging.log_every_n(tf.logging.INFO, "Loss: %s | local step: %s | global step: %s | Output: %s", 100, loss, i, g_steps, out)
            if g_steps%2000 == 0:
                # Store Variables Every 2000 times
                saver.save(sess, "tmp/model/model_v1_revised/model.ckpt")
            # global_step += 1 
            g_steps += 1
    print("Trainning times: ", total_poems)
    saver.save(sess, "tmp/model/model_v1_revised/model.ckpt")

## Predict Module

In [None]:
with tf.variable_scope("model_v1_revised"):
    def pretty(input_str):
        return re.sub("xxxnewlinexxx", "\n", input_str)
    random_word = id2stringtabel.lookup(tf.constant([random.randint(0, 11404)], dtype=tf.int64))
    word_predict, prev_state = sess.run([random_word, out_state], feed_dict={x: np.zeros([1, 50]), state: np.zeros([1, lstm.state_size], dtype=float)})
    print(word_predict)
    poem = [word_predict[0].decode()]
    while word_predict[0].decode() != "xxxendxxx":
        word_predict = sess.run(embed(word_predict))
        word_predict, prev_state = sess.run([word, out_state], feed_dict={x:word_predict, state: prev_state})
        poem.append(word_predict[0].decode())
        tf.logging.log_every_n(tf.logging.INFO, "%s", 20, pretty(" ".join(poem)))
        # print(pretty(" ".join(poem)))
    # Dump to generated dir
    generated_poem = pretty(" ".join(poem))
    filename = "-".join(poem[:5])
    with open("generated/"+"model_v1_revised_"+filename+".txt", "w") as f:
        f.write(generated_poem)

In [None]:
# Write graph to file
writer.close()

# v2 - Multi-RNN with Batch
* Deep LSTM
    - Try Different number of layers
    - Try Different LSTM units amount
* Unrolled RNNs
    * [x] Padding
    * [x] Batch
* Advanced Sampling
    * [Candidate Sampling](https://www.tensorflow.org/api_guides/python/nn#Candidate_Sampling) 
* Try other Embedding modules
    - https://tfhub.dev/google/nnlm-en-dim50/1
    - https://tfhub.dev/google/nnlm-en-dim50-with-normalization/1
    - https://tfhub.dev/google/nnlm-en-dim128/1
    - https://tfhub.dev/google/nnlm-en-dim128-with-normalization/1
    - https://tfhub.dev/google/Wiki-words-250/1
    - https://tfhub.dev/google/Wiki-words-250-with-normalization/1
    - https://tfhub.dev/google/Wiki-words-500/1
    - https://tfhub.dev/google/Wiki-words-500-with-normalization/1
* Multi-Dataset for multiple poets
* Merged Poems Generator
## Problem
* `xxxnewlinexxx` is much more closer, causing low loss value
* The network then recognize it to be more suitable
* Same for punctuations

In [None]:
class machine_poet_v2:
    def __init__(self, embedding_module_url="https://tfhub.dev/google/nnlm-en-dim50/1", embedding_dim=50, lstm_layers_units=(128, 256, 512), batch_size=4, data_filenames=["poems/shakespeare/sonnets.txt"], vocab_filename="vocabulary-shakespeare.txt", vocab_size=11405):
        ##
        # Model Config
        ##
        self.embedding_module_url = embedding_module_url
        self.embedding_dim = embedding_dim
        self.lstm_layers_units = lstm_layers_units
        self.batch_size = batch_size
        # FUTURE For dynamic batch size
        # self.set_batch_size = batch_size
        # self.dynamic_batch_size = batch_size
        # self.data_filenames = data_filenames
        # self.max_length = max_length # Should be the maximum one-line words amount in all data files
        self.vocab_filename = vocab_filename 
        self.vocab_size = vocab_size # Provide vocab_size? or use Tabel.size() method to calculate?
        self.global_steps = 0
        
        ##
        # Identifier
        ##
        # 0 1 2 ~~3~~
        # embedding lstm batch ~~datafile~~
        self.name_list = []
        self.name_list.append("-".join(self.embedding_module_url.split("/")[-2:]))
        self.name_list.append("-".join([ str(i) for i in lstm_layers_units]))
        self.name_list.append(str(batch_size))
        # self.name_list.append("-".join(self.data_filenames[0].split("/")))
        self.id = "-".join(self.name_list) # + str(random.getrandbits(20)) # TODO Add UNQ ID For separating 
        
        ##
        # Initialize Graph for each model instance
        ##
        self.graph = tf.Graph()
        
        with self.graph.as_default():
            ##
            #
            # Data instancs and Data process Ops
            #
            ##
            # Word2Vec Ops
            self.embed_ops = hub.Module(self.embedding_module_url)

            # Vocab lookup tables
            self.string_to_idtable = tf.contrib.lookup.index_table_from_file(vocabulary_file=self.vocab_filename, num_oov_buckets=0)
            self.id_to_stringtabel = tf.contrib.lookup.index_to_string_table_from_file(vocabulary_file=self.vocab_filename)
            
            # Model/Graph Building
            
            with tf.variable_scope("LSTM") as scope:
                # NOTE NO initializer=tf.random_normal_initializer, but WHY?
                # Relu or Tanh?
                cells = [tf.contrib.rnn.LSTMCell(num_units=unit, state_is_tuple=False, initializer=tf.random_normal_initializer) for unit in self.lstm_layers_units]
                # FUTURE state_is_tuple=True
                self.stacked_rnn_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=False)
                
                # DEBUG ONLY
                # self.stacked_rnn_cell = tf.contrib.rnn.LSTMCell(num_units=64, state_is_tuple=False, initializer=tf.random_normal_initializer)
            
            #####
            # Trainning sub Graph
            #
            # Saved Global Steps
            self.g_steps = tf.get_variable("Global_Steps", [], initializer=tf.zeros_initializer, dtype=tf.int64)
            # Batch_size Max_length Embedding_dim
            # All set None for accommodate dynamic dataset
            # Data
            self.inputs = tf.placeholder(shape=[self.batch_size, None, self.embedding_dim], dtype=tf.float32, name="Input_Words")
            self.inputs_lengths = tf.placeholder(shape=[self.batch_size], dtype=tf.int32, name="Input_Length")
            self.targets = tf.placeholder(shape=[self.batch_size, None, self.vocab_size], dtype=tf.int32, name="Target_Words")
            """
            print(stacked_rnn_cell.state_size)
            state_shape = tf.reshape(stacked_rnn_cell.state_size, [-1])
            state_shape = tf.cast(state_shape, tf.int32)
            print(state_shape)
            state_shape = [tf.Dimension(i) for i in state_shape]
            print(state_shape)
            state_shape = tf.concat([[tf.Dimension(None)], [tf.Dimension(0)]], 0)
            """
            # print(stacked_rnn_cell.state_size)
            
            #[[None, s.c, s.h] for s in stacked_rnn_cell.state_size]
            #state_shape = [len(self.lstm_layers_units), None, s.c, s.h]
            #state_shape = [[None, s.c, s.h] for s in stacked_rnn_cell.state_size]
            #initial_state = tf.placeholder(shape=state_shape, dtype=tf.int32)
            #initial_state=initial_state,
            
            # Unroll the RNN
            outputs, states = tf.nn.dynamic_rnn(cell=self.stacked_rnn_cell, inputs=self.inputs, sequence_length=self.inputs_lengths, dtype=tf.float32)
            # outputs, states = tf.nn.static_rnn(cell=self.stacked_rnn_cell, inputs=self.inputs, sequence_length=self.inputs_lengths, dtype=tf.float32)
            
            # Get logits
            # logits -> shape[batch_size, max_length, vocab_size]
            # TODO Here? Nan?
            # self.Dense = tf.layers.Dense(self.vocab_size, activation=tf.nn.softmax)
            # probabilities = self.Dense(outputs)
            self.Dense = tf.layers.Dense(self.vocab_size)
            logits = self.Dense(outputs)
            
            # Get ouput word
            possibility = tf.nn.softmax(logits)
            word_index = tf.argmax(possibility, axis=-1, name="Predict_Word_index")
            print(word_index.shape)
            self.word_ouput = self.id_to_stringtabel.lookup(word_index)
            print(self.word_ouput.shape)
            
            ##### TODOTODO Loss? Remade
            # Define Loss_Ops
            # Only calculate losses within lengths
            # Modify Loss?
            
            def cost(output, target):
                with tf.name_scope("Cost"):
                    # Compute cross entropy for each frame.
                    # TODO Better MASKING, without casting
                    cross_entropy = tf.multiply(tf.to_float(target), tf.log(output))
                    cross_entropy = -tf.reduce_sum(cross_entropy, 2)

                    mask = tf.to_float(tf.sign(tf.reduce_max(tf.abs(target), 2)))
                    cross_entropy = tf.multiply(cross_entropy, tf.to_float(mask))
                    # Average over actual sequence lengths.
                    cross_entropy = tf.reduce_sum(cross_entropy, 1)
                    cross_entropy /= tf.reduce_sum(mask, 1)
                    return tf.reduce_mean(cross_entropy)
            loss_label = tf.argmax(self.targets, -1)
            self.loss_op = tf.losses.sparse_softmax_cross_entropy(labels=loss_label, logits=logits)
            # self.loss_op = tf.losses.sparse_softmax_cross_entropy(onehot_labels=self.targets, logits=logits)
            # self.loss_op = cost(probabilities, self.targets)
            
            # Define Optimizer_Ops
            optimizer = tf.train.AdamOptimizer()
            # optimizer = tf.train.GradientDescentOptimizer(0.0001)
            # Apply Gradient Clipping to avoid Gradient Exploding
            # gvs = optimizer.compute_gradients(self.loss_op)
            # capped_gvs = [(tf.clip_by_value(grad, -1.0, 1), var) for grad, var in gvs]
            # gvs = [(grad, var) for grad, var in gvs]
            # self.train_op = optimizer.apply_gradientss(gvs)
            self.train_op = optimizer.minimize(self.loss_op) # Unclipped version
            #
            #####
            
            ##
            # Tensorboard Logging
            ##
            # Inputs
            tf.summary.histogram(name="inputs", values=self.inputs)
            tf.summary.histogram(name="inputs_length", values=self.inputs_lengths)
            # Targets
            tf.summary.histogram(name="targets", values=self.targets)
            # Outputs
            tf.summary.histogram(name="Output_possibility", values=possibility)
            # Loss
            tf.summary.scalar(name="loss", tensor=self.loss_op)
            # Log all variables
            for var in tf.trainable_variables():
                print(var.shape)
                self._variable_summaries(var)
            self.merged_op = tf.summary.merge_all()
            
            ##
            # Sampling sub Graph
            ## 
            with tf.name_scope("Sampling"):
                # self.random_start = self.id_to_stringtabel.lookup(tf.constant([random.randint(0, self.vocab_size)], dtype=tf.int64))
                self.initial_state = self.stacked_rnn_cell.zero_state(1, tf.float32)
                self.prev_word = tf.placeholder(shape=[1, self.embedding_dim], dtype=tf.float32, name="Prev_word")
                # print(self.initial_state)
                self.prev_state = tf.placeholder(shape=[1, self.stacked_rnn_cell.state_size], dtype=tf.float32, name="Prev_state")
                
                # print(self.prev_state)
                next_word, self.next_state = self.stacked_rnn_cell(self.prev_word, self.prev_state)
                next_word = self.Dense(next_word)
                next_word = tf.nn.softmax(next_word)
                print("nextword: ", next_word.shape)
                self.next_id = next_word = tf.argmax(tf.reshape(next_word, [-1]))
                print("nextword: ", next_word)
                # Random id
                self.random_start = self.id_to_stringtabel.lookup(tf.constant([random.randint(0, self.vocab_size)], dtype=tf.int64))
                self.next_word = self.id_to_stringtabel.lookup(next_word)
                
    def setDataset(self, data_filenames=["poems/shakespeare/sonnets.txt"], max_length=200, line_slice=None):
        with self.graph.as_default():
            # Loading raw Data
            dataset = tf.data.TextLineDataset(data_filenames)
            ##
            # Preprocess Dataset
            ##
            # Map_fn: Inserting Spaces
            def _insertSpace(sentence):
                sentence = sentence.decode()
                sentence = sentence.lower()
                sentence = re.sub(r'([\W\d])', r' \1 ', sentence)
                return sentence
            # Map_fn: Generating Labels
            def _getLabel(sentence):
                splited = tf.string_split([sentence]).values
                # Limit every input to line_slice words
                splited = splited[:line_slice]
                sentence = splited
                sentence = sentence[0:-1]
                ids = ids_val = self.string_to_idtable.lookup(splited)
                ids = tf.one_hot(ids, self.vocab_size)
                return sentence, ids, ids_val
            
            # Map_fn: Pad Each Sequence, Calculate Length
            # FUTURE Dynamic batch_size? 
            # FUTURE What if batch_size modification occur inside one batch?
            def _padSeq(sentence, ids, ids_val):
                used = tf.sign(tf.abs(ids_val))
                length = tf.reduce_sum(used)
                length = tf.cast(length, tf.int32)
                # Only pad the first dim, which is time_step
                # Shape = [?] with Dtype = tf.string - rank = 1
                sentence = tf.pad(sentence, [[0, max_length-length]], constant_values="")
                # Only pad the first dim, which is time_step
                # Shape = [?, vocab_size] with Dtype = tf.int32 - rank = 2
                ids = tf.pad(ids, [[0, max_length-length], [0, 0]], constant_values=0)
                """
                # FUTURE For dynamic batch size 
                print(length)
                def _pad():    
                    # Padding is needed
                    paddings_config = tf.constant([0, self.max_length-length])
                    sentence = tf.pad(sentence, paddings_config, constant_values="")
                    self.dynamic_batch_size = self.set_batch_size
                def _updateLength():
                    # Set max_length
                    self.max_length = length
                    self.dynamic_batch_size = 1
                tf.cond(length <= self.max_length, true_fn=_pad, false_fn=_updateLength)
                """
                return sentence, ids, length

            dataset = dataset.map(lambda sentence: tf.py_func(_insertSpace, [sentence], tf.string))
            dataset = dataset.map(_getLabel)
            dataset = dataset.map(_padSeq)
            dataset = dataset.shuffle(buffer_size=10000)
            dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(self.batch_size))
            dataset = dataset.repeat()
            self.data_iterator = dataset.make_initializable_iterator()
            self.max_length = max_length

    def initialize(self):
        with self.graph.as_default():
            self.saver = tf.train.Saver()
            sess = tf.Session()
            try:
                os.mkdir("tmp/model/" + self.id + "/")
            except FileExistsError:
                pass
            try:
                self.saver.restore(sess, "tmp/model/" + self.id + "/model.ckpt")
            except tf.errors.NotFoundError:
                pass
            sess.run(tf.global_variables_initializer())
            tf.tables_initializer().run(session=sess)
            sess.run(self.data_iterator.initializer)
            self.writer = tf.summary.FileWriter("tmp/log/" + self.id, sess.graph)
            self.session = sess
            
    def train(self, steps):
        with self.graph.as_default():
            sess = self.session
            next_poems = self.data_iterator.get_next()
            # print(sess.run(next_poems))
            # sentence.shape == [Batch_size, max_length, embedding_dim]
            # padding_config.shape == [D, 2]
            padding_config = [[0, 0], [1, 0], [0, 0]]
            self.global_steps = self.g_steps.eval(session=sess)
            print(self.global_steps)
            for step in range(1, steps):
                self.global_steps += 1
                sentences, labels, lengths = sess.run(next_poems)
                # print(sess.run(tf.argmax(labels,-1)))
                sentences = sess.run(tf.pad(tf.reshape(self.embed_ops(tf.reshape(sentences, [-1])), shape=[self.batch_size, self.max_length-1, self.embedding_dim]), padding_config))
                # np.pad
                # Need to tf.pad sentence -> x_0 = O
                #sentence = tf.pad(sentence, )
                _ , loss, summary, wd_out = sess.run([self.train_op, self.loss_op, self.merged_op, self.word_ouput], feed_dict={self.inputs: sentences, self.inputs_lengths: lengths, self.targets: labels})
                # print(wd_out.shape)
                tf.logging.log_every_n(tf.logging.INFO, "Loss: %s | local steps: %s | global steps: %s | Random word in output: %s", 5, loss, step, self.global_steps, wd_out[random.randint(0, self.batch_size-1), random.randint(0, self.max_length-1)])
                if step%5 == 0:
                    self.g_steps.assign(self.global_steps)
                    # text = tf.summary.text("Output_words", tf.cast(wd_out, dtype=tf.string))
                    # self.writer.add_summary(text, global_step=self.global_steps)
                    self.writer.add_summary(summary, global_step=self.global_steps)
                    self.writer.flush()
                    self.saver.save(sess, "tmp/model/" + self.id + "/model.ckpt")
            self.writer.close()
            
    def sampling(self, n_poems):
        with self.graph.as_default():
            sess = self.session
            for poem in range(n_poems):
                initial_state = sess.run(self.initial_state)
                predict_word, pre_state, _id = sess.run([self.random_start, self.next_state, self.next_id], feed_dict={self.prev_word: np.zeros([1, self.embedding_dim]), self.prev_state: initial_state})
                print(_id)
                predict_word = predict_word[0]
                print(predict_word)
                poem = [predict_word.decode()]
                while predict_word.decode() != "xxxendxxx":
                    # Continue Sampling
                    predict_word = sess.run(self.embed_ops([predict_word]))
                    predict_word, pre_state, _id = sess.run([self.next_word, self.next_state, self.next_id], feed_dict={self.prev_word:predict_word, self.prev_state: pre_state})
                    print(_id)
                    poem.append(predict_word.decode())
                    tf.logging.log_every_n(tf.logging.INFO, "%s", 5, self._pretty(" ".join(poem)))
                # Dump to generated dir
                generated_poem = self.pretty(" ".join(poem))
                filename = "-".join(poem[:5])
                with open("generated/"+filename+self.id+".txt", "w") as f:
                    f.write(generated_poem)
                tf.logging.log_every_n(tf.logging.INFO, "Number %s Complete", 1, poem+1)

    
    def _pretty(self, input_str):
        return re.sub("xxxnewlinexxx", "\n", input_str)
    
    def _variable_summaries(self, var):
        """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
        # With Better implementation for dealing invalid name_scope
        with tf.name_scope(''.join(var.name.split(':'))):
            # Lazy
            try:
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                with tf.name_scope('stddev'):
                    stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                tf.summary.histogram('histogram', var)
            except:
                pass
    

In [None]:
# def __init__(self, embedding_module_url="https://tfhub.dev/google/nnlm-en-dim50/1", embedding_dim=50, lstm_layers_units=(128, 256, 512), batch_size=4, data_filenames=["poems/shakespeare/sonnets.txt"], max_length=200, vocab_filename="vocabulary-shakespeare.txt", vocab_size=11405):
# model = machine_poet_v2(batch_size=1, max_length=200, lstm_layers_units=(128,))
model = machine_poet_v2(batch_size=4, lstm_layers_units=(16, 16), embedding_module_url="https://tfhub.dev/google/nnlm-en-dim50/1", embedding_dim=50)
model.setDataset(data_filenames=["poems/shakespeare/sonnets-origin.txt", "poems/shakespeare/corpus.txt"], line_slice=None, max_length=10)
model.initialize()


In [None]:
model.train(100)
# Trainning Problems
# RNN is not learning
# Outputs are all '\n' and 'and'

In [None]:
model.sampling(1)

# v2-Revised - Random Output Words
* Deep LSTM
    - Try Different number of layers
    - Try Different LSTM units amount
* Unrolled RNNs
    * [x] Padding
    * [x] Batch
* Randomly? pick one out of top k element in the output possibility as output word
* Advanced Sampling
    * [Candidate Sampling](https://www.tensorflow.org/api_guides/python/nn#Candidate_Sampling) 
* Try other Embedding modules
    - https://tfhub.dev/google/nnlm-en-dim50/1
    - https://tfhub.dev/google/nnlm-en-dim50-with-normalization/1
    - https://tfhub.dev/google/nnlm-en-dim128/1
    - https://tfhub.dev/google/nnlm-en-dim128-with-normalization/1
    - https://tfhub.dev/google/Wiki-words-250/1
    - https://tfhub.dev/google/Wiki-words-250-with-normalization/1
    - https://tfhub.dev/google/Wiki-words-500/1
    - https://tfhub.dev/google/Wiki-words-500-with-normalization/1
* Multi-Dataset for multiple poets
* Merged Poems Generator
* TODO
    * [] Save Global Steps?
## Problem
* `xxxnewlinexxx` is much more closer, causing low loss value
* The network then recognize it to be more suitable
* Same for punctuations

## Available Configurations Lists

In [None]:
# Try auto-generate 
# name = "-".join(url.split("/")[-2:])
embedding_modules_list = [
    ("nnlm-en-dim50-1", "https://tfhub.dev/google/nnlm-en-dim50/1"), 
    ("nnlm-en-dim50-with-normalization-1", "https://tfhub.dev/google/nnlm-en-dim50-with-normalization/1"),
    ("nnlm-en-dim128-1", "https://tfhub.dev/google/nnlm-en-dim128/1"),
    ("nnlm-en-dim128-with-normalization-1", "https://tfhub.dev/google/nnlm-en-dim128-with-normalization/1"),
    ("Wiki-words-250-1", "https://tfhub.dev/google/Wiki-words-250/1"),
    ("Wiki-words-250-with-normalization-1", "https://tfhub.dev/google/Wiki-words-250-with-normalization/1"),
    ("Wiki-words-500-1", "https://tfhub.dev/google/Wiki-words-500/1"),
    ("Wiki-words-500-with-normalization-1", "https://tfhub.dev/google/Wiki-words-500-with-normalization/1"),
]

# Auto-generate name for this one
# name = "-".joing(config)
lstm_layers_units_config = [
    (128, 256, 512)
]

# Auto name
batch_sizes = [4, 10, 15]

# Filenames for datasets
# name = "-".join(filenames.split("/"))
trainning_data_filenames = [
    "poems/shakespeare/sonnets.txt"
]

# Filenames for dictionary
vocab_filenames = [
    "vocabulary-shakespeare.txt"
]

## Model Class

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import re
import random
import os

In [None]:
class machine_poet_v2_revised:
    def __init__(self, embedding_module_url="https://tfhub.dev/google/nnlm-en-dim50/1", embedding_dim=50, lstm_layers_units=(128, 256, 512), learning_rate=0.03, batch_size=4, data_filenames=["poems/shakespeare/sonnets.txt"], max_length=200, vocab_filename="vocabulary-shakespeare.txt", vocab_size=11405):
        ##
        # Model Config
        ##
        self.embedding_module_url = embedding_module_url
        self.embedding_dim = embedding_dim
        self.lstm_layers_units = lstm_layers_units
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        # FUTURE For dynamic batch size
        # self.set_batch_size = batch_size
        # self.dynamic_batch_size = batch_size
        # self.data_filenames = data_filenames
        self.max_length = max_length # Should be the maximum one-line words amount in all data files
        self.vocab_filename = vocab_filename 
        self.vocab_size = vocab_size # Provide vocab_size? or use Tabel.size() method to calculate?
        self.global_steps = 0
        
        ##
        # Identifier
        ##
        # 0 1 2 ~~3~~
        # embedding lstm batch ~~datafile~~
        self.name_list = []
        self.name_list.append("-".join(self.embedding_module_url.split("/")[-2:]))
        self.name_list.append("-".join([ str(i) for i in lstm_layers_units]))
        self.name_list.append("-lr-" + str(self.learning_rate) + "-")
        self.name_list.append(str(batch_size))
        # self.name_list.append("-".join(self.data_filenames[0].split("/")))
        self.id = "-".join(self.name_list) # + str(random.getrandbits(20)) # TODO Add UNQ ID For separating 
        
        ##
        # Initialize Graph for each model instance
        ##
        self.graph = tf.Graph()
        
        with self.graph.as_default():
            ##
            #
            # Data instancs and Data process Ops
            #
            ##
            # Word2Vec Ops
            self.embed_ops = hub.Module(self.embedding_module_url)

            # Vocab lookup tables
            self.string_to_idtable = tf.contrib.lookup.index_table_from_file(vocabulary_file=self.vocab_filename, num_oov_buckets=0)
            self.id_to_stringtabel = tf.contrib.lookup.index_to_string_table_from_file(vocabulary_file=self.vocab_filename)
            
            # Model/Graph Building
            
            with tf.variable_scope("LSTM") as scope:
                # NOTE NO initializer=tf.random_normal_initializer, but WHY?
                # Relu or Tanh?
                cells = [tf.contrib.rnn.LSTMCell(num_units=unit, state_is_tuple=False, initializer=tf.random_normal_initializer) for unit in self.lstm_layers_units]
                # FUTURE state_is_tuple=True
                self.stacked_rnn_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=False)
                
                # DEBUG ONLY
                # self.stacked_rnn_cell = tf.contrib.rnn.LSTMCell(num_units=64, state_is_tuple=False, initializer=tf.random_normal_initializer)
            
            #####
            # Trainning sub Graph
            #
            # Saved Global Steps
            self.g_steps = tf.get_variable("Global_Steps", [], initializer=tf.zeros_initializer, dtype=tf.int64)
            # Batch_size Max_length Embedding_dim
            # All set None for accommodate dynamic dataset
            # Data
            self.inputs = tf.placeholder(shape=[self.batch_size, self.max_length, self.embedding_dim], dtype=tf.float32, name="Input_Words")
            self.inputs_lengths = tf.placeholder(shape=[self.batch_size], dtype=tf.int32, name="Input_Length")
            self.targets = tf.placeholder(shape=[self.batch_size, self.max_length, self.vocab_size], dtype=tf.int32, name="Target_Words")
            """
            print(stacked_rnn_cell.state_size)
            state_shape = tf.reshape(stacked_rnn_cell.state_size, [-1])
            state_shape = tf.cast(state_shape, tf.int32)
            print(state_shape)
            state_shape = [tf.Dimension(i) for i in state_shape]
            print(state_shape)
            state_shape = tf.concat([[tf.Dimension(None)], [tf.Dimension(0)]], 0)
            """
            # print(stacked_rnn_cell.state_size)
            
            #[[None, s.c, s.h] for s in stacked_rnn_cell.state_size]
            #state_shape = [len(self.lstm_layers_units), None, s.c, s.h]
            #state_shape = [[None, s.c, s.h] for s in stacked_rnn_cell.state_size]
            #initial_state = tf.placeholder(shape=state_shape, dtype=tf.int32)
            #initial_state=initial_state,
            
            # Unroll the RNN
            outputs, states = tf.nn.dynamic_rnn(cell=self.stacked_rnn_cell, inputs=self.inputs, sequence_length=self.inputs_lengths, dtype=tf.float32)
            # outputs, states = tf.nn.static_rnn(cell=self.stacked_rnn_cell, inputs=self.inputs, sequence_length=self.inputs_lengths, dtype=tf.float32)
            
            # Get logits
            # logits -> shape[batch_size, max_length, vocab_size]
            # TODO Here? Nan?
            # self.Dense = tf.layers.Dense(self.vocab_size, activation=tf.nn.softmax)
            # probabilities = self.Dense(outputs)
            self.Dense = tf.layers.Dense(self.vocab_size)
            logits = self.Dense(outputs)
            
            # Get ouput word
            possibility = tf.nn.softmax(logits)
            
            word_index = tf.argmax(possibility, axis=-1, name="Predict_Word_index")
            # [barch_size, max_length, k]
            # Input should be a list
            # [batch_size, max_length, k]
            # Randomly Pick 1 out of N classes 
            #_, word_index = tf.nn.top_k(possibility, k=4, sorted=False, name="K_Predict_Word_Index")
            # word_index = word_index[:, :, 0]
            # word_index = tf.cast(word_index, dtype=tf.int64)
            print(word_index.shape)
            self.word_ouput = self.id_to_stringtabel.lookup(word_index)
            print(self.word_ouput.shape)
            
            ##### TODOTODO Loss? Remade
            # Define Loss_Ops
            # Only calculate losses within lengths
            # Modify Loss?
            
            def cost(output, target):
                with tf.name_scope("Cost"):
                    # Compute cross entropy for each frame.
                    # TODO Better MASKING, without casting
                    cross_entropy = tf.multiply(tf.to_float(target), tf.log(output))
                    cross_entropy = -tf.reduce_sum(cross_entropy, 2)

                    mask = tf.to_float(tf.sign(tf.reduce_max(tf.abs(target), 2)))
                    cross_entropy = tf.multiply(cross_entropy, tf.to_float(mask))
                    # Average over actual sequence lengths.
                    cross_entropy = tf.reduce_sum(cross_entropy, 1)
                    cross_entropy /= tf.reduce_sum(mask, 1)
                    return tf.reduce_mean(cross_entropy)
            loss_label = tf.argmax(self.targets, -1)
            self.loss_op = tf.losses.sparse_softmax_cross_entropy(labels=loss_label, logits=logits)
            # self.loss_op = tf.losses.sparse_softmax_cross_entropy(onehot_labels=self.targets, logits=logits)
            # self.loss_op = cost(probabilities, self.targets)
            
            # Define Optimizer_Ops
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            # optimizer = tf.train.GradientDescentOptimizer(0.0001)
            # Apply Gradient Clipping to avoid Gradient Exploding
            # gvs = optimizer.compute_gradients(self.loss_op)
            # capped_gvs = [(tf.clip_by_value(grad, -1.0, 1), var) for grad, var in gvs]
            # gvs = [(grad, var) for grad, var in gvs]
            # self.train_op = optimizer.apply_gradientss(gvs)
            self.train_op = optimizer.minimize(self.loss_op) # Unclipped version
            #
            #####
            
            ##
            # Tensorboard Logging
            ##
            # Inputs
            tf.summary.histogram(name="inputs", values=self.inputs)
            tf.summary.histogram(name="inputs_length", values=self.inputs_lengths)
            # Targets
            tf.summary.histogram(name="targets", values=self.targets)
            # Outputs
            tf.summary.histogram(name="Output_possibility", values=possibility)
            # Loss
            tf.summary.scalar(name="loss", tensor=self.loss_op)
            # Log all variables
            for var in tf.trainable_variables():
                print(var.shape)
                self._variable_summaries(var)
            self.merged_op = tf.summary.merge_all()
            
            ##
            # Sampling sub Graph
            ## 
            with tf.name_scope("Sampling"):
                # self.random_start = self.id_to_stringtabel.lookup(tf.constant([random.randint(0, self.vocab_size)], dtype=tf.int64))
                self.initial_state = self.stacked_rnn_cell.zero_state(1, tf.float32)
                self.prev_word = tf.placeholder(shape=[1, self.embedding_dim], dtype=tf.float32, name="Prev_word")
                # print(self.initial_state)
                self.prev_state = tf.placeholder(shape=[1, self.stacked_rnn_cell.state_size], dtype=tf.float32, name="Prev_state")
                
                # print(self.prev_state)
                next_word, self.next_state = self.stacked_rnn_cell(self.prev_word, self.prev_state)
                next_word = self.Dense(next_word)
                next_word = tf.nn.softmax(next_word)
                print("nextword: ", next_word.shape)
                self.next_id = next_word = tf.argmax(tf.reshape(next_word, [-1]))
                print("nextword: ", next_word)
                # Random id
                self.random_start = self.id_to_stringtabel.lookup(tf.constant([random.randint(0, self.vocab_size)], dtype=tf.int64))
                self.next_word = self.id_to_stringtabel.lookup(next_word)
                
    def setDataset(self, data_filenames=["poems/shakespeare/sonnets.txt"]):
        with self.graph.as_default():
            # Loading raw Data
            dataset = tf.data.TextLineDataset(data_filenames)
            ##
            # Preprocess Dataset
            ##
            # Map_fn: Inserting Spaces
            def _insertSpace(sentence):
                sentence = sentence.decode()
                sentence = sentence.lower()
                sentence = re.sub(r'([\W\d])', r' \1 ', sentence)
                return sentence
            # Map_fn: Generating Labels
            def _getLabel(sentence):
                splited = tf.string_split([sentence]).values
                # Limit every input to line_slice words
                # splited = splited[:30]
                sentence = splited
                sentence = sentence[0:-1]
                ids = ids_val = self.string_to_idtable.lookup(splited)
                ids = tf.one_hot(ids, self.vocab_size)
                return sentence, ids, ids_val
            
            # Map_fn: Pad Each Sequence, Calculate Length
            # FUTURE Dynamic batch_size? 
            # FUTURE What if batch_size modification occur inside one batch?
            def _padSeq(sentence, ids, ids_val):
                used = tf.sign(tf.abs(ids_val))
                length = tf.reduce_sum(used)
                length = tf.cast(length, tf.int32)
                # Only pad the first dim, which is time_step
                # Shape = [?] with Dtype = tf.string - rank = 1
                sentence = tf.pad(sentence, [[0, self.max_length-length]], constant_values="")
                # Only pad the first dim, which is time_step
                # Shape = [?, vocab_size] with Dtype = tf.int32 - rank = 2
                ids = tf.pad(ids, [[0, self.max_length-length], [0, 0]], constant_values=0)
                """
                # FUTURE For dynamic batch size 
                print(length)
                def _pad():    
                    # Padding is needed
                    paddings_config = tf.constant([0, self.max_length-length])
                    sentence = tf.pad(sentence, paddings_config, constant_values="")
                    self.dynamic_batch_size = self.set_batch_size
                def _updateLength():
                    # Set max_length
                    self.max_length = length
                    self.dynamic_batch_size = 1
                tf.cond(length <= self.max_length, true_fn=_pad, false_fn=_updateLength)
                """
                return sentence, ids, length

            dataset = dataset.map(lambda sentence: tf.py_func(_insertSpace, [sentence], tf.string))
            dataset = dataset.map(_getLabel)
            dataset = dataset.map(_padSeq)
            dataset = dataset.shuffle(buffer_size=10000)
            dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(self.batch_size))
            dataset = dataset.repeat()
            self.data_iterator = dataset.make_initializable_iterator()

    def initialize(self):
        with self.graph.as_default():
            self.saver = tf.train.Saver()
            sess = tf.Session()
            try:
                os.mkdir("tmp/model/" + self.id + "/")
            except FileExistsError:
                pass
            try:
                self.saver.restore(sess, "tmp/model/" + self.id + "/model.ckpt")
            except tf.errors.NotFoundError:
                pass
            sess.run(tf.global_variables_initializer())
            tf.tables_initializer().run(session=sess)
            sess.run(self.data_iterator.initializer)
            self.writer = tf.summary.FileWriter("tmp/log/" + self.id, sess.graph)
            self.session = sess
            
    def train(self, steps):
        with self.graph.as_default():
            sess = self.session
            next_poems = self.data_iterator.get_next()
            # print(sess.run(next_poems))
            # sentence.shape == [Batch_size, max_length, embedding_dim]
            # padding_config.shape == [D, 2]
            padding_config = [[0, 0], [1, 0], [0, 0]]
            # self.global_steps = self.g_steps.eval(session=sess)
            print(self.global_steps)
            for step in range(1, steps):
                self.global_steps += 1
                sentences, labels, lengths = sess.run(next_poems)
                # print(sess.run(tf.argmax(labels,-1)))
                sentences = sess.run(tf.pad(tf.reshape(self.embed_ops(tf.reshape(sentences, [-1])), shape=[self.batch_size, self.max_length-1, self.embedding_dim]), padding_config))
                # np.pad
                # Need to tf.pad sentence -> x_0 = O
                #sentence = tf.pad(sentence, )
                _ , loss, summary, wd_out = sess.run([self.train_op, self.loss_op, self.merged_op, self.word_ouput], feed_dict={self.inputs: sentences, self.inputs_lengths: lengths, self.targets: labels})
                tf.logging.log_every_n(tf.logging.INFO, "Loss: %s | local steps: %s | global steps: %s | Random word in output: %s", 10, loss, step, self.global_steps, wd_out[random.randint(0, self.batch_size-1), random.randint(0, self.max_length-1)])
                if step%10 == 1:
                    # self.g_steps.assign(self.global_steps)
                    # text = tf.summary.text("Output_words", tf.cast(wd_out, dtype=tf.string))
                    # self.writer.add_summary(text, global_step=self.global_steps)
                    self.writer.add_summary(summary, global_step=self.global_steps)
                    self.writer.flush()
                    self.saver.save(sess, "tmp/model/" + self.id + "/model.ckpt")
            self.writer.close()
            
    def sampling(self, n_poems, input_word=None):
        with self.graph.as_default():
            sess = self.session
            initial_state = sess.run(self.initial_state)
            # TODO Condition expression
            predict_word, pre_state, _id = sess.run([self.random_start, self.next_state, self.next_id], feed_dict={self.prev_word: np.zeros([1, self.embedding_dim]), self.prev_state: initial_state})
            if input_word != None:
                predict_word = [input_word.encode()]
            print(_id)
            predict_word = predict_word[0]
            print(predict_word)
            poem = [predict_word.decode()]
            while predict_word.decode() != "xxxendxxx":
                # Continue Sampling
                predict_word = sess.run(self.embed_ops([predict_word]))
                predict_word, pre_state, _id = sess.run([self.next_word, self.next_state, self.next_id], feed_dict={self.prev_word:predict_word, self.prev_state: pre_state})
                print(_id)
                poem.append(predict_word.decode())
                tf.logging.log_every_n(tf.logging.INFO, "%s", 5, self._pretty(" ".join(poem)))
            # Dump to generated dir
            generated_poem = self.pretty(" ".join(poem))
            filename = "-".join(poem[:5])
            with open("generated/"+filename+self.id+".txt", "w") as f:
                f.write(generated_poem)

    
    def _pretty(self, input_str):
        return re.sub("xxxnewlinexxx", "\n", input_str)
    
    def _variable_summaries(self, var):
        """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
        # With Better implementation for dealing invalid name_scope
        with tf.name_scope(''.join(var.name.split(':'))):
            # Lazy
            try:
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                with tf.name_scope('stddev'):
                    stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                tf.summary.histogram('histogram', var)
            except:
                pass
    

In [None]:
# def __init__(self, embedding_module_url="https://tfhub.dev/google/nnlm-en-dim50/1", embedding_dim=50, lstm_layers_units=(128, 256, 512), batch_size=4, data_filenames=["poems/shakespeare/sonnets.txt"], max_length=200, vocab_filename="vocabulary-shakespeare.txt", vocab_size=11378):
model = machine_poet_v2_revised(learning_rate=0.03,batch_size=32, lstm_layers_units=(32, 32), embedding_module_url="https://tfhub.dev/google/Wiki-words-250/1", embedding_dim=250)
model.setDataset()
model.initialize()


In [None]:
model.train(100)
# Trainning Problems
# RNN is not learning
# Outputs are all '\n' and 'and'

In [None]:
model.sampling(1, input_word="thy")

# v3 - Dropout Multi Graphs