In [54]:
import pandas as pd
import tensorflow as tf

import numpy as np
import os
import time

In [55]:
quotes = pd.read_json('quotes.json')

In [56]:
quotes

Unnamed: 0,Quote,Author,Tags,Popularity,Category
0,"Don't cry because it's over, smile because it ...",Dr. Seuss,"[attributed-no-source, cry, crying, experience...",0.155666,life
1,"Don't cry because it's over, smile because it ...",Dr. Seuss,"[attributed-no-source, cry, crying, experience...",0.155666,happiness
2,"I'm selfish, impatient and a little insecure. ...",Marilyn Monroe,"[attributed-no-source, best, life, love, mista...",0.129122,love
3,"I'm selfish, impatient and a little insecure. ...",Marilyn Monroe,"[attributed-no-source, best, life, love, mista...",0.129122,life
4,"I'm selfish, impatient and a little insecure. ...",Marilyn Monroe,"[attributed-no-source, best, life, love, mista...",0.129122,truth
...,...,...,...,...,...
48386,"In Buddhism, they say attachment to anything o...",Jason Mraz,"[Suffering, Laugh, Stage]",0.000000,humor
48387,I love British humor. It's just so - surreal.,Beck,"[Love, British, Surreal]",0.000000,humor
48388,I've got a sense of humor. I'm a funny guy.,Daryl Hall,"[Funny, Guy]",0.000000,humor
48389,"Humor is such a wonderful thing, helping you r...",Lynda Barry,"[Time, Beautiful, Fool]",0.000000,humor


In [57]:
quotes.groupby(['Category']).count().sort_values(['Quote'])

Unnamed: 0_level_0,Quote,Author,Tags,Popularity
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
motivation,34,34,34,34
friendship,655,655,655,655
science,893,893,893,893
religion,943,943,943,943
funny,957,957,957,957
poetry,971,971,971,971
god,994,994,994,994
education,1005,1005,1005,1005
death,1022,1022,1022,1022
faith,1024,1024,1024,1024


In [58]:
text = ''
sentences = []
for sentence in quotes['Quote']:
    sentences.append(sentence)
for j in sentences:
    text = text + j

In [59]:
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

550 unique characters


In [60]:
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])


In [61]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')



{
  ' ' :   0,
  '!' :   1,
  '"' :   2,
  '#' :   3,
  '$' :   4,
  '%' :   5,
  '&' :   6,
  "'" :   7,
  '(' :   8,
  ')' :   9,
  '*' :  10,
  '+' :  11,
  ',' :  12,
  '-' :  13,
  '.' :  14,
  '/' :  15,
  '0' :  16,
  '1' :  17,
  '2' :  18,
  '3' :  19,
  ...
}


In [62]:
# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))


"Don't cry bec" ---- characters mapped to int ---- > [36 78 77  7 83  0 66 81 88  0 65 68 66]


In [63]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])

D
o
n
'
t


In [64]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

"Don't cry because it's over, smile because it happened.Don't cry because it's over, smile because it "
"happened.I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at ti"
"mes hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at"
" my best.I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at ti"
"mes hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at"


In [65]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [66]:
for input_example, target_example in  dataset.take(1):
    print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  "Don't cry because it's over, smile because it happened.Don't cry because it's over, smile because it"
Target data: "on't cry because it's over, smile because it happened.Don't cry because it's over, smile because it "


In [67]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step    0
  input: 36 ('D')
  expected output: 78 ('o')
Step    1
  input: 78 ('o')
  expected output: 77 ('n')
Step    2
  input: 77 ('n')
  expected output: 7 ("'")
Step    3
  input: 7 ("'")
  expected output: 83 ('t')
Step    4
  input: 83 ('t')
  expected output: 0 (' ')


In [68]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int32, tf.int32)>

In [69]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [70]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [71]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [72]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 550) # (batch_size, sequence_length, vocab_size)


In [73]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (64, None, 256)           140800    
_________________________________________________________________
gru_2 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_2 (Dense)              (64, None, 550)           563750    
Total params: 4,642,854
Trainable params: 4,642,854
Non-trainable params: 0
_________________________________________________________________


In [74]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [75]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

Input: 
 'n art history. I was especially interested in exploring this idea of the ecstatic impulse in an arti'

Next Char Predictions: 
 'έীẙ`βпÎдաخ¿टζdшγ¸^Vוăم.ˈwÉΛ−̵पț”βăą*Мй♡ूł1ئ8μ─BخСشրـ-ẙԱe2άÂЗК্ხ♡टԱłزBؤЛ¾ص&ড়յkʼςोК¾ض♡»нрtῳţიyНьó€JIӜʺ'


In [76]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 550)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       6.3083973


In [77]:
model.compile(optimizer='adam', loss=loss)

In [78]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [79]:
EPOCHS=10

In [80]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [81]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints\\ckpt_10'

In [82]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [83]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (1, None, 256)            140800    
_________________________________________________________________
gru_3 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_3 (Dense)              (1, None, 550)            563750    
Total params: 4,642,854
Trainable params: 4,642,854
Non-trainable params: 0
_________________________________________________________________


In [84]:
def generate_text(model = model, start_string = ' ', num_generate=1000, more_sentences = 0):
  # Evaluation step (generating text using the learned model)

  # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
    text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
    temperature = 0.4
    count_sentences = -1

  # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
      # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)
 
      # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])
        
        if idx2char[predicted_id] == '.':
            count_sentences += 1
            if count_sentences == more_sentences:
                return start_string + ''.join(text_generated)
            else:
                pass
        else:
            pass

In [85]:
print(generate_text())

 quite actually students and hands to the present moment as a poem created by patience.


In [92]:
print(generate_text(model, "Life ", 1000))

Life is a bad which is in the darkness of the world.


In [93]:
print(generate_text(model, "Mysterious ", 1000))

Mysterious Children are like water.


In [94]:
print(generate_text(model, "Happiness ", 1000))

Happiness is the most professional thing about the present and every part of the beauty in the world.


In [95]:
print(generate_text(model, "Death ", 1000))

Death is the best which we shall be able to see the world with a bullet back in the shadows of her life as the student that we can be found in the world.


In [97]:
print(generate_text(model, "Sea ", 1000))

Sea in the world who has no precious thing to achieve their own consciousness in the desert of our lives and straight at the same time we lose anything that we can get in the world.


In [101]:
print(generate_text(model, "Hope ", 1000))

Hope is the most powerful men who read them when they are in love.


In [106]:
print(generate_text(model, "Feed ", 1000))

Feed become what we do for the world with me.


In [109]:
print(generate_text(model, "Forgive ", 1000))

Forgive everything that will destroy you and desire to experience life and strange and self-destruction.
