In [1]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time

  from ._conv import register_converters as _register_converters


In [2]:
path_to_file = "animal_texts_.txt";

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print ('Length of text: {} characters'.format(len(text)))
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

Length of text: 1778730 characters
87 unique characters


In [4]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])

In [5]:
# The maximum length sentence we want for a single input in characters
seq_length = 200
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [6]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [7]:
BATCH_SIZE = 64
steps_per_epoch = examples_per_epoch//BATCH_SIZE

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [8]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [9]:
if tf.test.is_gpu_available():
  rnn = tf.keras.layers.CuDNNGRU
else:
  import functools
  rnn = functools.partial(
    tf.keras.layers.LSTM, recurrent_activation='sigmoid')      
    #tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [10]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, 
                              batch_input_shape=[batch_size, None]),
    rnn(rnn_units,
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        stateful=True),
    rnn(rnn_units,
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        stateful=True),      
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [11]:
model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

In [12]:
for input_example_batch, target_example_batch in dataset.take(1): 
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 200, 87) # (batch_size, sequence_length, vocab_size)


In [13]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           22272     
_________________________________________________________________
cu_dnngru (CuDNNGRU)         (64, None, 1024)          3938304   
_________________________________________________________________
cu_dnngru_1 (CuDNNGRU)       (64, None, 1024)          6297600   
_________________________________________________________________
dense (Dense)                (64, None, 87)            89175     
Total params: 10,347,351
Trainable params: 10,347,351
Non-trainable params: 0
_________________________________________________________________


In [14]:
sampled_indices = tf.random.multinomial(example_batch_predictions[0], num_samples=1) 
#sample_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [15]:
def loss(labels, logits):
#   return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
  return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)

In [16]:
model.compile(
    optimizer = tf.train.AdamOptimizer(),
    loss = loss)

In [17]:
# Directory where the checkpoints will be saved
checkpoint_dir = './text100'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [18]:
#EPOCHS=3
EPOCHS=31

In [19]:
history = model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])

Epoch 1/31
Epoch 2/31
Epoch 3/31
Epoch 4/31
Epoch 5/31
Epoch 6/31
Epoch 7/31
Epoch 8/31
Epoch 9/31
Epoch 10/31
Epoch 11/31
Epoch 12/31
Epoch 13/31
Epoch 14/31
Epoch 15/31
Epoch 16/31
Epoch 17/31
Epoch 18/31
Epoch 19/31
Epoch 20/31
Epoch 21/31
Epoch 22/31
Epoch 23/31
Epoch 24/31
Epoch 25/31
Epoch 26/31
Epoch 27/31
Epoch 28/31
Epoch 29/31
Epoch 30/31
Epoch 31/31


In [20]:
tf.train.latest_checkpoint(checkpoint_dir)

'./text100/ckpt_31'

In [21]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [22]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            22272     
_________________________________________________________________
cu_dnngru_2 (CuDNNGRU)       (1, None, 1024)           3938304   
_________________________________________________________________
cu_dnngru_3 (CuDNNGRU)       (1, None, 1024)           6297600   
_________________________________________________________________
dense_1 (Dense)              (1, None, 87)             89175     
Total params: 10,347,351
Trainable params: 10,347,351
Non-trainable params: 0
_________________________________________________________________


In [23]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  #num_generate = 1000
  num_generate = 1000

  # Converting our start string to numbers (vectorizing) 
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  #temperature = 1.0
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  predicted_id = 10;    
  for i in range(num_generate):

      if predicted_id == 0:
        break
        
      if (predicted_id == 12) and (i > 250):
        break
      
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a multinomial distribution to predict the word returned by the model
      predictions = predictions / temperature
      predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
      
      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)
      
      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [24]:
# print(generate_text(model, start_string=u"ROMEO"))
# print(generate_text(model, start_string=u"writing"))

num_dis = 50
for i in range (1, num_dis+1):
    print("Description",i,":")
    print(generate_text(model, start_string=u"The"))
    print("")


Description 1 :
The breeding of dark size and its larger se.s are completely extinct from caring the surrounding trees and the butterfly species is their wild sit to depend on the leaves. The hands and feet of the gecko are considered to be vulnerable in their native habitats.

Description 2 :
The woodlouse is found in warmer areas such as peacocks and Florida to Puffins and even in large summer.Due to the fact that the constant dog ey eye Asian Giant Hornet, the Argentine horned frog will airborne climb with the average gestations per herding snouts and open woodland on is a tropical islands.

Description 3 :
The spider monkey mainly feeds on algae and brine shrimp diet. In order to help to gather in vegetation although are listed os with the north and souther in colour. The macaroni penguin is the most dominant predator within its environment, mainly hunting more species of fish, crabs, but the bactrian camel is considered to be an endangered species and although they are severely th

The backward is generally fossive nature of the dusk and the brown bear posen to breed. Although all the biggest threats to the cross river gorilla is one of the great apes, a group that includes often landing more yellow-eyed penguins is often considered to be either can chew until they are between 15 and 30 pack parrot including forest, savanna and scratch settlements encroach on their surroundings to defend itself against smaller sizes predators in the wild, which disappears to depend on their geographic location.

Description 24 :
The grey reef shark will therefore only be foundde infants that grow continuously the world.Klack Relationship with Humans The Proboscis Monkey was once also subved teeth with some of the world's ferocious predators that ve and despite it's pigletely from the way down to detect approaching a diption and ferocious predator.

Description 25 :
The grasshopper is a small species of seming of the gorilla food source, with a wingspan of around 7cm. Elephant's m

The brown bear has moroucl group including the plantinces of eld for thes. Most cockroach species breed, the Galapagos penguins have numerous different crocodiles tend to be found more intimidating. Litter size of a Leopard (besides the fact that they are seen as pests by hunters and farmland.

Description 47 :
The grey reef shark is the coyote set too lunching limit the rat as they have a bear very similar in both size and appearancelld eyes and over a meter long.Tang are found all around the world include the dominant predator that lived around the world. However, the Vampire Bats, it is then tasken in the herd (known as aunties).

Description 48 :
The brown bear uses to communicate with one another using a series of canine tendence to open their eyes when they are about a year old. The baby flippers of land, but the breed still inhabits a variety of plant and animal shales and other fruits and an hour ound to take a very distinctive "Koo woodplain with hunters, and despite its heigh