In [3]:
# https://www.tensorflow.org/extend/estimators
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# tensorflow
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
import tensorflow.contrib.learn as tflearn
import tensorflow.contrib.layers as tflayers

# keras
from tensorflow.contrib.keras.python.keras.layers import Dense, LSTM, GRU, Activation
from tensorflow.contrib.keras.python.keras.utils.data_utils import get_file

# input data
from tensorflow.examples.tutorials.mnist import input_data

# estimators
from tensorflow.contrib import learn

# estimator "builder"
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib

# helpers
import numpy as np
import random
import sys

# enable logs
tf.logging.set_verbosity(tf.logging.INFO)

def sample(preds, temperature=1.0):
    #print(preds)
    return np.argmax(preds)

# THE MODEL
def model_fn(features, targets, mode, params):
    """Model function for Estimator."""
    
    # 1. Configure the model via TensorFlow operations
    # First, build all the model, a good idea is using Keras or tf.layers
    # since these are high-level API's
    #lstm = GRU(128, input_shape=(params["maxlen"], params["vocab_size"]))(features)
    #preds = Dense(params["vocab_size"], activation='sigmoid')(lstm)
    
    # 0. Reformat input shape to become a sequence
    lstm1 = GRU(128, input_shape=(params["maxlen"], params["vocab_size"]),
                return_sequences=False)(features)
    #lstm2 = GRU(128)(lstm1)
    preds = Dense(params["vocab_size"])(lstm1)
    preds_softmax = Activation("softmax")(preds)

    # 2. Define the loss function for training/evaluation
    loss = None
    train_op = None
    
    # Calculate Loss (for both TRAIN and EVAL modes)
    if mode != learn.ModeKeys.INFER:
        loss = tf.losses.softmax_cross_entropy(
            onehot_labels=targets, logits=preds)

    # 3. Define the training operation/optimizer
    
    # Configure the Training Op (for TRAIN mode)
    if mode == learn.ModeKeys.TRAIN:
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=params["learning_rate"],
            optimizer="RMSProp",
        )

    # 4. Generate predictions
    predictions_dict = {
      "preds": preds_softmax
    }
    
    # 5. Define how you want to evaluate the model
    metrics = {
        "accuracy": tf.metrics.accuracy(tf.argmax(input=preds_softmax, axis=1), tf.argmax(input=targets, axis=1))
    }
    
    # 6. Return predictions/loss/train_op/eval_metric_ops in ModelFnOps object
    return model_fn_lib.ModelFnOps(
      mode=mode,
      predictions=predictions_dict,
      loss=loss,
      train_op=train_op,
      eval_metric_ops=metrics)

In [4]:
print('Getting data')

#path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
path = 'shakespeare.txt'
text = open(path).read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 1
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.float32)
y = np.zeros((len(sentences), len(chars)), dtype=np.float32)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

print(X[0])

Getting data
corpus length: 1115394
total chars: 39
nb sequences: 1115354
Vectorization...
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [None]:
# PARAMETERS
LEARNING_RATE = 0.01
BATCH_SIZE = 1
STEPS = len(sentences)

NUM_OUTPUTS_PRED = 500 # Number of test characters of text to generate after training the network

# Set model params
model_params = {"learning_rate": LEARNING_RATE, "vocab_size": len(chars), "maxlen": maxlen}

# Instantiate Estimator
nn = tf.contrib.learn.Estimator(model_fn=model_fn, params=model_params)

# Score accuracy
# Fit
print('-' * 40)
print("Training")
print('-' * 40)
nn.fit(x=X, y=y, steps=STEPS, batch_size=BATCH_SIZE)

# choose a random sentence
start_index = random.randint(0, len(text) - maxlen - 1)
sentence = text[start_index: start_index + maxlen]

# generate output using the RNN model
original_sentence = sentence
generated = sentence
for i in range(NUM_OUTPUTS_PRED):
    x = np.zeros((1, maxlen, len(chars)), dtype=np.float32)
    for t, char in enumerate(sentence):
        x[0, t, char_indices[char]] = 1.

    p = None
    for e in nn.predict(x):
        if p is None: p = e["preds"]
    next_index = sample(p)
    next_char = indices_char[next_index]

    generated += next_char
    sentence = sentence[1:] + next_char

print('\n' * 10, '-' * 100)
print('HERE')
print(generated)
print(original_sentence)
print('-' * 100, '\n' * 10)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_id': 0, '_save_checkpoints_secs': 600, '_keep_checkpoint_every_n_hours': 10000, '_master': '', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f48cb050c88>, '_keep_checkpoint_max': 5, '_evaluation_master': '', '_tf_random_seed': None, '_is_chief': True, '_num_ps_replicas': 0, '_environment': 'local', '_model_dir': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_save_summary_steps': 100, '_task_type': None, '_save_checkpoints_steps': None}
----------------------------------------
Training
----------------------------------------
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...

  equality = a == b


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpsnevr3y3/model.ckpt.
INFO:tensorflow:loss = 3.64558, step = 1
INFO:tensorflow:global_step/sec: 26.5339
INFO:tensorflow:loss = 4.03005, step = 101 (3.770 sec)
INFO:tensorflow:global_step/sec: 27.2854
INFO:tensorflow:loss = 2.77961, step = 201 (3.665 sec)
INFO:tensorflow:global_step/sec: 26.7679
INFO:tensorflow:loss = 3.10089, step = 301 (3.739 sec)
INFO:tensorflow:global_step/sec: 26.8403
INFO:tensorflow:loss = 5.49769, step = 401 (3.729 sec)
INFO:tensorflow:global_step/sec: 26.7837
INFO:tensorflow:loss = 3.52534, step = 501 (3.728 sec)
INFO:tensorflow:global_step/sec: 25.8704
INFO:tensorflow:loss = 1.59166, step = 601 (3.864 sec)
INFO:tensorflow:global_step/sec: 26.789
INFO:tensorflow:loss = 6.09143, step = 701 (3.734 sec)
INFO:tensorflow:global_step/sec: 26.8786
INFO:tensorflow:loss = 2.4158, step = 801 (3.720 sec)
INFO:tensorflow:global_step/sec: 27.3595
INFO:tensorflow:loss = 2.93907, s

INFO:tensorflow:loss = 3.88607, step = 8401 (3.563 sec)
INFO:tensorflow:global_step/sec: 27.2645
INFO:tensorflow:loss = 3.29669, step = 8501 (3.670 sec)
INFO:tensorflow:global_step/sec: 27.0021
INFO:tensorflow:loss = 2.18847, step = 8601 (3.702 sec)
INFO:tensorflow:global_step/sec: 26.7854
INFO:tensorflow:loss = 3.34612, step = 8701 (3.733 sec)
INFO:tensorflow:global_step/sec: 27.8074
INFO:tensorflow:loss = 1.53164, step = 8801 (3.599 sec)
INFO:tensorflow:global_step/sec: 26.6641
INFO:tensorflow:loss = 6.73189, step = 8901 (3.748 sec)
INFO:tensorflow:global_step/sec: 26.6922
INFO:tensorflow:loss = 1.76937, step = 9001 (3.751 sec)
