In [14]:
%load_ext autoreload
%aimport -tf
%aimport data
%aimport model
%autoreload 1

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
import tensorflow as tf
import pandas as pd
import numpy as np
import data
import model
import nltk

In [88]:
tf.logging.set_verbosity(tf.logging.INFO)

In [89]:
# Hyperparameters
MAX_DOCUMENT_LENGTH = 20
VOCAB_MIN_FREQUENCY = 50
DATA_TEST_SIZE = 5000

# Model Parameters
RNN_CELL_SIZE = 256
EMBEDDING_DIM = 128

# Training Parameters
TRAIN_BATCH_SIZE = 16
TRAIN_EVAL_EVERY = 1000

In [90]:
ds = data.load_reddit_data(
    max_document_length=MAX_DOCUMENT_LENGTH,
    min_frequency=VOCAB_MIN_FREQUENCY,
    test_size=DATA_TEST_SIZE)

In [95]:
data.print_dataset_stats(ds)

Vocabulary Size: 10803
Train Data Shape: (385367, 20)
Dev Data Shape (5000, 20)
Vocabulary size 10803


In [91]:
train_input_fn, dev_input_fn = data.create_train_dev_input_fns(ds, batch_size=TRAIN_BATCH_SIZE)

In [92]:
def simple_rnn(sequences, sequence_lengths):
    cell = tf.nn.rnn_cell.GRUCell(RNN_CELL_SIZE)
    sequence_list = tf.unpack(sequences, axis=1)
    return tf.nn.rnn(cell, sequence_list, dtype=tf.float32, sequence_length=sequence_lengths)

model_fn = model.create_language_model_rnn(
    vocab_size=len(ds.vocab.vocabulary_),
    embedding_dim=EMBEDDING_DIM,
    rnn_fn=simple_rnn)

In [100]:
# Monitor for sampling sentences from the estimator
sample_mon = model.SentenceSampleMonitor(
    vocab=ds.vocab,
    every_n_steps=TRAIN_EVAL_EVERY,
    first_n_steps=-1)

# Monitor for development set loss
dev_monitor = tf.contrib.learn.monitors.ValidationMonitor(
    input_fn=dev_input_fn,
    every_n_steps=TRAIN_EVAL_EVERY)

model_name = "maxlen_{}_rnn{}_embed_{}".format(MAX_DOCUMENT_LENGTH, RNN_CELL_SIZE, EMBEDDING_DIM)
estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir="./checkpoints/{}".format(model_name))

In [None]:
estimator.fit(input_fn=train_input_fn, steps=None, monitors=[sample_mon, dev_monitor])

INFO:tensorflow:Create CheckpointSaver
INFO:tensorflow:Restored model from ./checkpoints/maxlen_20_rnn256_embed_128/model.ckpt-900-?????-of-00001
INFO:tensorflow:Step 901: loss = 6.37077
INFO:tensorflow:Saving checkpoints for 901 into ./checkpoints/maxlen_20_rnn256_embed_128/model.ckpt.


Sampling from model at ./checkpoints/maxlen_20_rnn256_embed_128/model.ckpt-901-?????-of-00001
SENTENCE_START
SENTENCE_START i
SENTENCE_START i are
SENTENCE_START i are ;
SENTENCE_START i are ; <UNK>
SENTENCE_START i are ; <UNK> drugs
SENTENCE_START i are ; <UNK> drugs would
SENTENCE_START i are ; <UNK> drugs would miles
SENTENCE_START i are ; <UNK> drugs would miles ,




SENTENCE_START i are ; <UNK> drugs would miles , against
[0.11155516, 0.0082614403, 0.012146911, 0.058830481, 9.1312548e-05, 0.0028278122, 2.9213594e-05, 0.019151369, 0.00021820559, 0.0071246373]


INFO:tensorflow:Restored model from ./checkpoints/maxlen_20_rnn256_embed_128/model.ckpt-901-?????-of-00001
INFO:tensorflow:Eval steps [0,inf) for training step 901.
INFO:tensorflow:Results after 10 steps (0.192 sec/batch): loss = 5.98163.
INFO:tensorflow:Results after 20 steps (0.184 sec/batch): loss = 6.28958.
