# Chuck Tingle Title Generator

This notebook will generate Chuck Tingle titles using Tensorflow and RNNs. We will give the model an initial character or set of characters, and the model will automatically create a novel title.


Inspired by:
https://www.tensorflow.org/tutorials/text/text_generation

In [1]:
# load packages.
import tensorflow as tf
tf.enable_eager_execution()

import pandas as pd
import numpy as np
import os
import time


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Preprocessing

In [2]:
# Load file containing all book titles.
# These titles have been compiled from Chuck Tingles wiki page
book_titles = pd.read_csv('BookList.txt', error_bad_lines=False)
book_titles.head

<bound method NDFrame.head of                                                  title              date
0            My Billionaire Triceratops Craves Gay Ass   December 9 2014
1                         Pounded by President Bigfoot  December 10 2014
2                       Taken by the Gay Unicorn Biker  December 11 2014
3                 Gay T-Rex Law Firm: Executive Boner     January 9 2015
4                          Space Raptor Butt Invasion    January 11 2015
..                                                 ...               ...
193  Not Pounded By The Physical Manifestation Of S...      June 19 2020
194  Bisexual Mothman Mailman Makes A Special Deliv...      June 27 2020
195    Helicopter Man Pounds Dinosaur Billionaire Ass       July 17 2015
196        Buttageddon: The Final Days of Pounding Ass    August 17 2015
197  Trans Wizard Harriet Porber And The Bad Boy Pa...      June 12 2020

[198 rows x 2 columns]>

In [3]:
# get mega string of all book titles
text = ''.join(book_titles['title'].values)

# get unique set of characters
vocab = sorted(set(text))

In [4]:
# Map characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# convert chars to the indicies 
text_as_int = np.array([char2idx[c] for c in text])


# Build Data Generator

In [5]:
ave_num_char = np.mean([len(i) for i in book_titles['title'].values])

print("The average number of char in a title is {:.2f}".format(ave_num_char))

The average number of char in a title is 70.51


In [6]:
# The maximum length title we want for a single input in characters
seq_length = 70
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])

Instructions for updating:
Colocations handled automatically by placer.
M
y
 
B
i


In [7]:
# convert the mega string, into a sequence of characters with the desired size
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

# since there aren't that many titles, we make sure the sequence can repeat
sequences = sequences.repeat() 

for item in sequences.take(5):
      print(repr(''.join(idx2char[item.numpy()])))


'My Billionaire Triceratops Craves Gay AssPounded by President BigfootTa'
'ken by the Gay Unicorn BikerGay T-Rex Law Firm: Executive Boner Space R'
'aptor Butt Invasion My Ass Is Haunted by the Gay Unicorn Colonel Pounde'
"d by the Gay Unicorn Football Squad I'm Gay for My Living Billionaire J"
'et Plane Bigfoot Sommelier Butt Tasting Unicorn Butt Cops: Beach Patrol'


In [8]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Train Model

In [9]:
# build model

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                        batch_input_shape=[batch_size, None]))
    model.add(tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'))
    model.add(tf.keras.layers.Dense(vocab_size))
    return model


In [10]:
BATCH_SIZE = 64
BUFFER_SIZE = 100
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)


model = build_model(
  vocab_size = len(vocab),
  embedding_dim = 256,
  rnn_units = 512,
  batch_size = 64)
model.summary() 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16896     
_________________________________________________________________
gru (GRU)                    (64, None, 512)           1181184   
_________________________________________________________________
dense (Dense)                (64, None, 66)            33858     
Total params: 1,231,938
Trainable params: 1,231,938
Non-trainable params: 0
_________________________________________________________________


In [11]:
for input_example_batch, target_example_batch in dataset.take(1):
    print('ok')
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

model.summary() 
 #%%
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

sampled_indices


def loss(labels, logits):
      return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())


model.compile(optimizer='adam', loss=loss)

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)



ok
(64, 70, 66) # (batch_size, sequence_length, vocab_size)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16896     
_________________________________________________________________
gru (GRU)                    (64, None, 512)           1181184   
_________________________________________________________________
dense (Dense)                (64, None, 66)            33858     
Total params: 1,231,938
Trainable params: 1,231,938
Non-trainable params: 0
_________________________________________________________________
Prediction shape:  (64, 70, 66)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.1894555


In [28]:
history = model.fit(dataset, epochs=50,
                    steps_per_epoch = 100,
                    callbacks=[checkpoint_callback])


Epoch 1/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 2/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 3/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 4/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 5/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 6/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 7/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 8/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 9/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 10/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 11/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 12/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 13/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 14/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 15/50

Consider using a TensorFlow optimizer from `

Epoch 17/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 18/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 19/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 20/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 21/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 22/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 23/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 24/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 25/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 26/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 27/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 28/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 29/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 30/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 31/50

Consider using a TensorFlow optimiz


Consider using a TensorFlow optimizer from `tf.train`.
Epoch 33/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 34/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 35/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 36/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 37/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 38/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 39/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 40/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 41/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 42/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 43/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 44/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 45/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 46/50

Consider using a TensorFlow optimizer from `tf.

Epoch 48/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 49/50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 50/50

Consider using a TensorFlow optimizer from `tf.train`.


# Predictions

In [29]:
tf.train.latest_checkpoint(checkpoint_dir)

model_pred = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model_pred.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model_pred.build(tf.TensorShape([1, None]))

model_pred.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            16896     
_________________________________________________________________
gru_2 (GRU)                  (1, None, 512)            1181184   
_________________________________________________________________
dense_2 (Dense)              (1, None, 66)             33858     
Total params: 1,231,938
Trainable params: 1,231,938
Non-trainable params: 0
_________________________________________________________________


In [30]:

def generate_text(model, start_string):
    # Evaluation step (generating text using the learned model)

    # Number of characters to generate
    num_generate = 30

    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    # Experiment to find the best setting.
    temperature = 1.0

    # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # We pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))


In [37]:
start_string = 'What'

# print generated title
print(generate_text(model_pred, start_string=start_string))

What I’m Peebutt Go: Pounded by 'E
