In [None]:
import tensorflow as tf
import numpy as np
import os
tf.enable_eager_execution()
text = open('./data/tianlongbabu_jinyong.txt', encoding="utf8").read()

vocab = sorted(set(text))
vocab_size = len(vocab)
print ('{} unique characters'.format(vocab_size))

char2idx = {c : idx for idx, c in enumerate(vocab)}
idx2char = np.array(vocab)

text_int = np.array([char2idx[c] for c in text])

char_dataset = tf.data.Dataset.from_tensor_slices(text_int)

seq_length = 100
n_samples = len(text) // seq_length
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)
dataset = sequences.map(lambda seq : (seq[:-1], seq[1:]))

batch_size = 64
n_mini_batches = n_samples // batch_size

#what if I don't do shuffle
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

rnn = tf.keras.layers.CuDNNLSTM

def build_model(vocab_size, embedding_size, rnn_units, batch_size):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocab_size, embedding_size,
                                        batch_input_shape = [batch_size, None]))
    model.add(rnn(rnn_units, 
                  return_sequences=True, 
                  recurrent_initializer='glorot_uniform',
                  stateful=True))
    model.add(tf.keras.layers.Dense(vocab_size))
    return model

embedding_size = 512
rnn_units = 1024

model = build_model(vocab_size = vocab_size,
                    embedding_size=embedding_size,
                    rnn_units=rnn_units,
                    batch_size=batch_size)

#what if using softmax
def loss_fun(labels, logits):
    return tf.keras.backend.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer = tf.train.AdamOptimizer(),
              loss = loss_fun)


checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,
                                                       save_weights_only=True)



In [None]:
history = model.fit(dataset.repeat(), 
                    epochs=10, 
                    steps_per_epoch=n_mini_batches, 
                    callbacks=[checkpoint_callback])



In [None]:
gen_model = build_model(vocab_size, embedding_size, rnn_units, batch_size=1)

gen_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

gen_model.build(tf.TensorShape([1, None]))



def generate_text(model, start_string, temperature = 1.0):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
    num_generate = 1000

  # Converting our start string to numbers (vectorizing) 
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    # Experiment to find the best setting.
    

    # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a multinomial distribution to predict the word returned by the model
        predictions = predictions / temperature
        predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
        
        # We pass the predicted word as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
        
        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))


print(generate_text(gen_model, start_string=text[11349: 11354]), 0.5)
