# Family Guy Script Generation

## Get the Data


In [15]:
import helper

data_dir = './data/family-guy-20.txt'
text = helper.load_data(data_dir)

## Explore the Data
Play around with `view_sentence_range` to view different parts of the data.

In [16]:
view_sentence_range = (0, 10)

import numpy as np

print('Dataset Stats')
print('Roughly the number of unique words: {}'.format(len({word: None for word in text.split()})))
scenes = text.split('\n\n')
print('Number of scenes: {}'.format(len(scenes)))
sentence_count_scene = [scene.count('\n') for scene in scenes]
print('Average number of sentences in each scene: {}'.format(np.average(sentence_count_scene)))

sentences = [sentence for scene in scenes for sentence in scene.split('\n')]
print('Number of lines: {}'.format(len(sentences)))
word_count_sentence = [len(sentence.split()) for sentence in sentences]
print('Average number of words in each line: {}'.format(np.average(word_count_sentence)))

print()
print('The sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))

Dataset Stats
Roughly the number of unique words: 10799
Number of scenes: 688
Average number of sentences in each scene: 5.867732558139535
Number of lines: 4725
Average number of words in each line: 12.873015873015873

The sentences 0 to 10:
LOIS: Smoking. How does a boy like that go so wrong?
PETER: They live in a crummy neighborhood.
BRIAN: The Bradys?
PETER: Yeah. They got robbers, thugs, drug dealers. You name it.
GUY: You folks want some pancakes? 
PETER: No, thanks. See, that's the worst we got is Jemima's Witnesses.

MEG: Mom, my lips are too thin. Can I please get collagen injections? 
LOIS: Meg, you don't need to change the way you look. You know, most of the world's problems stem from poor self-image.



## Implement Functions

In [17]:
import numpy as np
import problem_unittests as tests

from collections import Counter

def create_lookup_tables(text):
   
    words = set(text)   
    
    int_to_vocab = {i: word for i, word in enumerate(words)}
    
    vocab_to_int = {word: i for i, word in enumerate(words)}
  
    return vocab_to_int, int_to_vocab

def token_lookup():
    """
    Generate a dict to turn punctuation into a token.
    :return: Tokenize dictionary where the key is the punctuation and the value is the token
    """
    punct_to_token = {'.': '<PERIOD>',
                      ',': '<COMMA>',
                      '"': '<QUOTATION>',
                      ';': '<SEMICOLON>',
                      '!': '<EXCLAMATION>',
                      '?': '<QUESTION>',
                      '(': '<LEFTP>',
                      ')': '<RIGHTP>',
                      '[': '<LEFTB>',
                      ']': '<RIGHTB>',
                      '--': '<DASH>',
                      '\n': '<RETURN>',}
    
    return punct_to_token

def get_tensors(loaded_graph):

    InputTensor = loaded_graph.get_tensor_by_name("input:0")
    InitialStateTensor = loaded_graph.get_tensor_by_name("initial_state:0")
    FinalStateTensor = loaded_graph.get_tensor_by_name("final_state:0")
    ProbsTensor = loaded_graph.get_tensor_by_name("probs:0")
    
    return InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor

def pick_word(probabilities, int_to_vocab):
    
    index = np.argmax(probabilities)

    word = int_to_vocab[index]
    
    return word

## Preprocess all the data and save it

In [18]:
# Preprocess Training, Validation, and Testing Data
helper.preprocess_and_save_data(data_dir, token_lookup, create_lookup_tables)

import helper

int_text, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()


## Build the Neural Network

### Setup Tensorflow

In [19]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.2.1
Default GPU Device: /gpu:0


### Define Functions


In [23]:
def get_inputs():

    Input = tf.placeholder(tf.int32, [None, None], name='input')
    Targets = tf.placeholder(tf.int32, [None, None], name='targets')
    LearningRate = tf.placeholder(tf.float32, name='learningrate')

    return Input, Targets, LearningRate

def get_init_cell(batch_size, rnn_size):

    # Set the size of the LSTMs
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size) 
    cell = tf.contrib.rnn.MultiRNNCell([lstm]*1)

    initial_state = cell.zero_state(batch_size, tf.float32)

    initial_state = tf.identity(initial_state, name="initial_state")

    return cell, initial_state

def get_embed(input_data, vocab_size, embed_dim):
 
    embedding = tf.Variable(tf.random_uniform((vocab_size, embed_dim), -1, 1))
    embed = tf.nn.embedding_lookup(embedding, input_data)

    return embed

def build_rnn(cell, inputs):

    outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
    
    final_state = tf.identity(final_state, name="final_state")
        
    return outputs, final_state

def build_nn(cell, rnn_size, input_data, vocab_size, embed_dim):
 
    embedding_layer = get_embed(input_data, vocab_size, embed_dim)
    
    rnn_layer, final_state = build_rnn(cell, embedding_layer)
    
    logits = tf.contrib.layers.fully_connected(inputs=rnn_layer, num_outputs=vocab_size, activation_fn=None)
    
    return logits, final_state

def get_batches(int_text, batch_size, seq_length):
 
    # Turn the input text into an np array.
    int_text = np.array(int_text)
    
    # Get the number of characters per batch and number of batches we can make
    characters_per_batch = batch_size * seq_length
    n_batches = len(int_text)//characters_per_batch
    
    # Keep only enough characters to make full batches
    int_text = int_text[:n_batches * characters_per_batch]
    
    # Reshape into n_seqs rows
    int_text = int_text.reshape((batch_size, -1))
     
    # Create the empty list to start adding batches to.
    batches = []
    
    for n in range(0, int_text.shape[1], seq_length):
        # The features
        x = int_text[:, n:n+seq_length]
        
        # The targets, shifted by one
        rolled = np.roll(int_text, -1)
        y = rolled[:, n:n+seq_length]        
        
        z = np.array([x, y])
        batches.append(z)
        
    # Turn batches into a numpy array.
    batches = np.array(batches)        
        
    return batches


## Neural Network Training
### Hyperparameters


In [24]:
# Number of Epochs
num_epochs = 600
# Batch Size
batch_size = 256
# RNN Size
rnn_size = 256
# Embedding Dimension Size
embed_dim = 256
# Sequence Length
seq_length = 15
# Learning Rate
learning_rate = 0.001
# Show stats for every n number of batches
show_every_n_batches = 10

save_dir = './save'

### Build the Graph

In [25]:
from tensorflow.contrib import seq2seq

train_graph = tf.Graph()
with train_graph.as_default():
    vocab_size = len(int_to_vocab)
    input_text, targets, lr = get_inputs()
    input_data_shape = tf.shape(input_text)
    cell, initial_state = get_init_cell(input_data_shape[0], rnn_size)
    logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size, embed_dim)

    # Probabilities for generating words
    probs = tf.nn.softmax(logits, name='probs')

    # Loss function
    cost = seq2seq.sequence_loss(
        logits,
        targets,
        tf.ones([input_data_shape[0], input_data_shape[1]]))

    # Optimizer
    optimizer = tf.train.AdamOptimizer(lr)

    # Gradient Clipping
    gradients = optimizer.compute_gradients(cost)
    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
    train_op = optimizer.apply_gradients(capped_gradients)

### Train

In [26]:

batches = get_batches(int_text, batch_size, seq_length)

with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    
    # Run for 1000 epochs with learning rate equal to 0.001
    for epoch_i in range(num_epochs):
        state = sess.run(initial_state, {input_text: batches[0][0]})

        for batch_i, (x, y) in enumerate(batches):
            feed = {
                input_text: x,
                targets: y,
                initial_state: state,
                lr: 0.001}
            train_loss, state, _ = sess.run([cost, final_state, train_op], feed)

            # Show every <show_every_n_batches> batches
            if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0:
                print('Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(
                    epoch_i,
                    batch_i,
                    len(batches),
                    train_loss))    

    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_dir)
    print('Model Trained and Saved')

Epoch   0 Batch    0/21   train_loss = 8.784
Epoch   0 Batch   10/21   train_loss = 7.579
Epoch   0 Batch   20/21   train_loss = 6.271
Epoch   1 Batch    9/21   train_loss = 6.167
Epoch   1 Batch   19/21   train_loss = 6.110
Epoch   2 Batch    8/21   train_loss = 5.938
Epoch   2 Batch   18/21   train_loss = 5.923
Epoch   3 Batch    7/21   train_loss = 5.783
Epoch   3 Batch   17/21   train_loss = 5.723
Epoch   4 Batch    6/21   train_loss = 5.635
Epoch   4 Batch   16/21   train_loss = 5.590
Epoch   5 Batch    5/21   train_loss = 5.511
Epoch   5 Batch   15/21   train_loss = 5.514
Epoch   6 Batch    4/21   train_loss = 5.455
Epoch   6 Batch   14/21   train_loss = 5.349
Epoch   7 Batch    3/21   train_loss = 5.293
Epoch   7 Batch   13/21   train_loss = 5.318
Epoch   8 Batch    2/21   train_loss = 5.294
Epoch   8 Batch   12/21   train_loss = 5.287
Epoch   9 Batch    1/21   train_loss = 5.029
Epoch   9 Batch   11/21   train_loss = 5.090
Epoch  10 Batch    0/21   train_loss = 5.021
Epoch  10 

Epoch  87 Batch    3/21   train_loss = 2.582
Epoch  87 Batch   13/21   train_loss = 2.491
Epoch  88 Batch    2/21   train_loss = 2.510
Epoch  88 Batch   12/21   train_loss = 2.471
Epoch  89 Batch    1/21   train_loss = 2.516
Epoch  89 Batch   11/21   train_loss = 2.501
Epoch  90 Batch    0/21   train_loss = 2.499
Epoch  90 Batch   10/21   train_loss = 2.482
Epoch  90 Batch   20/21   train_loss = 2.457
Epoch  91 Batch    9/21   train_loss = 2.452
Epoch  91 Batch   19/21   train_loss = 2.459
Epoch  92 Batch    8/21   train_loss = 2.505
Epoch  92 Batch   18/21   train_loss = 2.395
Epoch  93 Batch    7/21   train_loss = 2.399
Epoch  93 Batch   17/21   train_loss = 2.440
Epoch  94 Batch    6/21   train_loss = 2.397
Epoch  94 Batch   16/21   train_loss = 2.419
Epoch  95 Batch    5/21   train_loss = 2.456
Epoch  95 Batch   15/21   train_loss = 2.371
Epoch  96 Batch    4/21   train_loss = 2.320
Epoch  96 Batch   14/21   train_loss = 2.354
Epoch  97 Batch    3/21   train_loss = 2.404
Epoch  97 

Epoch 174 Batch    6/21   train_loss = 1.290
Epoch 174 Batch   16/21   train_loss = 1.259
Epoch 175 Batch    5/21   train_loss = 1.330
Epoch 175 Batch   15/21   train_loss = 1.252
Epoch 176 Batch    4/21   train_loss = 1.229
Epoch 176 Batch   14/21   train_loss = 1.242
Epoch 177 Batch    3/21   train_loss = 1.284
Epoch 177 Batch   13/21   train_loss = 1.225
Epoch 178 Batch    2/21   train_loss = 1.200
Epoch 178 Batch   12/21   train_loss = 1.165
Epoch 179 Batch    1/21   train_loss = 1.253
Epoch 179 Batch   11/21   train_loss = 1.221
Epoch 180 Batch    0/21   train_loss = 1.191
Epoch 180 Batch   10/21   train_loss = 1.215
Epoch 180 Batch   20/21   train_loss = 1.210
Epoch 181 Batch    9/21   train_loss = 1.199
Epoch 181 Batch   19/21   train_loss = 1.166
Epoch 182 Batch    8/21   train_loss = 1.233
Epoch 182 Batch   18/21   train_loss = 1.168
Epoch 183 Batch    7/21   train_loss = 1.171
Epoch 183 Batch   17/21   train_loss = 1.198
Epoch 184 Batch    6/21   train_loss = 1.182
Epoch 184 

Epoch 261 Batch    9/21   train_loss = 0.658
Epoch 261 Batch   19/21   train_loss = 0.618
Epoch 262 Batch    8/21   train_loss = 0.669
Epoch 262 Batch   18/21   train_loss = 0.644
Epoch 263 Batch    7/21   train_loss = 0.623
Epoch 263 Batch   17/21   train_loss = 0.640
Epoch 264 Batch    6/21   train_loss = 0.617
Epoch 264 Batch   16/21   train_loss = 0.574
Epoch 265 Batch    5/21   train_loss = 0.619
Epoch 265 Batch   15/21   train_loss = 0.596
Epoch 266 Batch    4/21   train_loss = 0.588
Epoch 266 Batch   14/21   train_loss = 0.592
Epoch 267 Batch    3/21   train_loss = 0.603
Epoch 267 Batch   13/21   train_loss = 0.604
Epoch 268 Batch    2/21   train_loss = 0.579
Epoch 268 Batch   12/21   train_loss = 0.549
Epoch 269 Batch    1/21   train_loss = 0.599
Epoch 269 Batch   11/21   train_loss = 0.584
Epoch 270 Batch    0/21   train_loss = 0.554
Epoch 270 Batch   10/21   train_loss = 0.592
Epoch 270 Batch   20/21   train_loss = 0.590
Epoch 271 Batch    9/21   train_loss = 0.584
Epoch 271 

Epoch 348 Batch   12/21   train_loss = 0.382
Epoch 349 Batch    1/21   train_loss = 0.403
Epoch 349 Batch   11/21   train_loss = 0.396
Epoch 350 Batch    0/21   train_loss = 0.370
Epoch 350 Batch   10/21   train_loss = 0.418
Epoch 350 Batch   20/21   train_loss = 0.435
Epoch 351 Batch    9/21   train_loss = 0.443
Epoch 351 Batch   19/21   train_loss = 0.437
Epoch 352 Batch    8/21   train_loss = 0.468
Epoch 352 Batch   18/21   train_loss = 0.460
Epoch 353 Batch    7/21   train_loss = 0.426
Epoch 353 Batch   17/21   train_loss = 0.446
Epoch 354 Batch    6/21   train_loss = 0.400
Epoch 354 Batch   16/21   train_loss = 0.365
Epoch 355 Batch    5/21   train_loss = 0.404
Epoch 355 Batch   15/21   train_loss = 0.387
Epoch 356 Batch    4/21   train_loss = 0.363
Epoch 356 Batch   14/21   train_loss = 0.366
Epoch 357 Batch    3/21   train_loss = 0.382
Epoch 357 Batch   13/21   train_loss = 0.390
Epoch 358 Batch    2/21   train_loss = 0.369
Epoch 358 Batch   12/21   train_loss = 0.354
Epoch 359 

Epoch 435 Batch   15/21   train_loss = 0.262
Epoch 436 Batch    4/21   train_loss = 0.245
Epoch 436 Batch   14/21   train_loss = 0.250
Epoch 437 Batch    3/21   train_loss = 0.266
Epoch 437 Batch   13/21   train_loss = 0.269
Epoch 438 Batch    2/21   train_loss = 0.259
Epoch 438 Batch   12/21   train_loss = 0.253
Epoch 439 Batch    1/21   train_loss = 0.258
Epoch 439 Batch   11/21   train_loss = 0.267
Epoch 440 Batch    0/21   train_loss = 0.242
Epoch 440 Batch   10/21   train_loss = 0.275
Epoch 440 Batch   20/21   train_loss = 0.268
Epoch 441 Batch    9/21   train_loss = 0.260
Epoch 441 Batch   19/21   train_loss = 0.248
Epoch 442 Batch    8/21   train_loss = 0.276
Epoch 442 Batch   18/21   train_loss = 0.271
Epoch 443 Batch    7/21   train_loss = 0.251
Epoch 443 Batch   17/21   train_loss = 0.286
Epoch 444 Batch    6/21   train_loss = 0.247
Epoch 444 Batch   16/21   train_loss = 0.230
Epoch 445 Batch    5/21   train_loss = 0.258
Epoch 445 Batch   15/21   train_loss = 0.258
Epoch 446 

Epoch 522 Batch   18/21   train_loss = 0.280
Epoch 523 Batch    7/21   train_loss = 0.264
Epoch 523 Batch   17/21   train_loss = 0.293
Epoch 524 Batch    6/21   train_loss = 0.254
Epoch 524 Batch   16/21   train_loss = 0.243
Epoch 525 Batch    5/21   train_loss = 0.281
Epoch 525 Batch   15/21   train_loss = 0.285
Epoch 526 Batch    4/21   train_loss = 0.284
Epoch 526 Batch   14/21   train_loss = 0.287
Epoch 527 Batch    3/21   train_loss = 0.302
Epoch 527 Batch   13/21   train_loss = 0.308
Epoch 528 Batch    2/21   train_loss = 0.301
Epoch 528 Batch   12/21   train_loss = 0.284
Epoch 529 Batch    1/21   train_loss = 0.284
Epoch 529 Batch   11/21   train_loss = 0.280
Epoch 530 Batch    0/21   train_loss = 0.253
Epoch 530 Batch   10/21   train_loss = 0.290
Epoch 530 Batch   20/21   train_loss = 0.285
Epoch 531 Batch    9/21   train_loss = 0.278
Epoch 531 Batch   19/21   train_loss = 0.275
Epoch 532 Batch    8/21   train_loss = 0.300
Epoch 532 Batch   18/21   train_loss = 0.306
Epoch 533 

## Generate TV Script

In [29]:
helper.save_params((seq_length, save_dir))

import tensorflow as tf
import numpy as np
import helper
import problem_unittests as tests

_, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()
seq_length, load_dir = helper.load_params()

gen_length = 2000

prime_word = 'peter'

loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(load_dir + '.meta')
    loader.restore(sess, load_dir)

    # Get Tensors from loaded model
    input_text, initial_state, final_state, probs = get_tensors(loaded_graph)

    # Sentences generation setup
    gen_sentences = [prime_word + ':']
    prev_state = sess.run(initial_state, {input_text: np.array([[1]])})

    # Generate sentences
    for n in range(gen_length):

        # Dynamic Input
        dyn_input = [[vocab_to_int[word] for word in gen_sentences[-seq_length:]]]

        dyn_seq_length = len(dyn_input[0])

        # Get Prediction
        probabilities, prev_state = sess.run(
            [probs, final_state],
            {input_text: dyn_input, initial_state: prev_state})

        pred_word = pick_word(probabilities[0][dyn_seq_length-1], int_to_vocab)

        gen_sentences.append(pred_word)
    
    # Remove tokens
    tv_script = ' '.join(gen_sentences)
    for key, token in token_dict.items():
        ending = ' ' if key in ['\n', '(', '"'] else ''
        tv_script = tv_script.replace(' ' + token.lower(), key)
    tv_script = tv_script.replace('\n ', '\n')
    tv_script = tv_script.replace('( ', '(')
        
    print(tv_script)

INFO:tensorflow:Restoring parameters from ./save
peter: i was gonna call them. but my favorite episode of different strokes was on. tomorrow.
lois: yeah. i have no son, i know? i've fooled people before. you're fired!
peter: yeah! i wouldn't have to the right. it's the griffins.
lois: stop it.
brian: oh, god, that was good. i just had a peter griffin production. hey, i can't leave more important, and that's final. see, i'm peter griffin.
peter: yeah, it's okay. i was just a formality, since i happen to be just like when we can start with someone else being worshipped like him.
brian: peter, brian.
brian: justice. what the hell?
brian: you don't know what love's like.
stewie: oh, you! stay![ dramatic instrumental music]

peter:[ nervous laughter]

lois: we didn't measure!
peter: wait, here, you guys.
chris: oh, dad, you did my whole day's work have a job, peter?
peter: we're being chased by ghosts!

death: peter, what the hell is that? i'm not! you all right?
lois: peter, what are you d