## Imports

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# tensorflow
import tensorflow as tf
print('Tested with TensorFlow 1.2.0')
print ('Your TensorFlow version:', tf.__version__) # tested with v1.2
from tensorflow.contrib.learn.python.learn.estimators import constants
from tensorflow.contrib.learn.python.learn.estimators.dynamic_rnn_estimator import PredictionType

# Estimators
from tensorflow.contrib import learn

# Model builder
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib


# Input function
from tensorflow.python.estimator.inputs import numpy_io

# data and data preprocessing
from tensorflow.contrib.keras.python.keras.datasets import imdb 
from tensorflow.contrib.keras.python.keras.preprocessing import sequence

# Run an experiment
from tensorflow.contrib.learn.python.learn import learn_runner

# numpy
import numpy as np

# Enable TensorFlow logs
tf.logging.set_verbosity(tf.logging.INFO)

Tested with TensorFlow 1.2.0
Your TensorFlow version: 1.2.0


## Helpers

In [4]:
# map word to index
word_to_index = imdb.get_word_index()
# map index to word
index_to_word = {}
num_words = 0
for k in word_to_index: 
    index_to_word[word_to_index[k]] = k
    num_words += 1

# turn a sequence into a sentence
def get_sentence(seq):
    sentence = ''
    for v in seq:
        if v != 0: # 0 means it was just added to the sentence so it could have maxlen words
            sentence += index_to_word[int(v)] + ' '
    return sentence

# turn a sentence into a sequence
def gen_sequence(sentence):
    seq = []
    for word in sentence:
        seq.append(word_to_index[word])
    return np.asarray(seq, dtype=np.float32)

print('there are', num_words, 'words in the files')
#for i in range(1, 100):
#    print(i, index_to_word[i])
#print(index_to_word[49], index_to_word[212], index_to_word[242], index_to_word[136])

there are 88584 words in the files


## Visualizing data

In [5]:
'''
# ------------------- negative
print('-' * 30)
print('Example of a negative review')
print('-' * 30)

x = open('data/train/neg/0_3.txt')
r = x.readline()
print(r)

# ------------------ positive
print()
print('-' * 30)
print('Example of a positive review')
print('-' * 30)

x = open('data/train/pos/0_9.txt')
r = x.readline()
print(r)
'''

"\n# ------------------- negative\nprint('-' * 30)\nprint('Example of a negative review')\nprint('-' * 30)\n\nx = open('data/train/neg/0_3.txt')\nr = x.readline()\nprint(r)\n\n# ------------------ positive\nprint()\nprint('-' * 30)\nprint('Example of a positive review')\nprint('-' * 30)\n\nx = open('data/train/pos/0_9.txt')\nr = x.readline()\nprint(r)\n"

In [6]:
print('Loading data')
num_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# lets make things faster
limit = 3200
maxlen = 80

x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

x_train = x_train[:limit].astype('float32')
y_train = y_train[:limit].astype('int32')

x_test = x_test[:limit].astype('float32')
y_test = y_test[:limit].astype('int32')

'''
# y to onehot
y_train_one_hot = np.zeros((limit, 2), dtype=np.float32)
for i in range(limit):
    y_train_one_hot[i][y_train[i]] = 1

y_test_one_hot = np.zeros((limit, 2), dtype=np.float32)
for i in range(limit):
    y_test_one_hot[i][y_test[i]] = 1

print(x_train.shape)
#print(y_train)
#print(y_train_one_hot)
'''

Loading data


'\n# y to onehot\ny_train_one_hot = np.zeros((limit, 2), dtype=np.float32)\nfor i in range(limit):\n    y_train_one_hot[i][y_train[i]] = 1\n\ny_test_one_hot = np.zeros((limit, 2), dtype=np.float32)\nfor i in range(limit):\n    y_test_one_hot[i][y_test[i]] = 1\n\nprint(x_train.shape)\n#print(y_train)\n#print(y_train_one_hot)\n'

In [10]:
# parameters
LEARNING_RATE = 0.01
BATCH_SIZE = 64
STEPS = 10000

# Input functions
'''
x_train_dict = {'x': x_train }

train_input_fn = numpy_io.numpy_input_fn(
          x_train_dict, y_train, batch_size=BATCH_SIZE, 
           shuffle=True, num_epochs=None, 
            queue_capacity=1000, num_threads=4)

x_test_dict = {'x': x_test }
	
test_input_fn = numpy_io.numpy_input_fn(
          x_test_dict, y_test, batch_size=BATCH_SIZE, shuffle=False, num_epochs=1)
'''

# Input Function                                                 
def get_input_fn(labels, targets, batch_size, epochs=None):
    def input_fn():
        batched = tf.train.shuffle_batch({'x': labels,
                                          'y': targets},
                                     batch_size,
                                     min_after_dequeue=100,
                                     num_threads=4,
                                     capacity=1000,
                                     enqueue_many=True,
                                     allow_smaller_final_batch=True)
        label = batched.pop('y')
        return batched, label
    return input_fn

train_input_fn = get_input_fn(x_train, y_train, BATCH_SIZE)
test_input_fn = get_input_fn(x_test, y_test, BATCH_SIZE)

# In[ ]:

# create estimator
xc = tf.contrib.layers.real_valued_column("x")
estimator = tf.contrib.learn.DynamicRnnEstimator(problem_type = constants.ProblemType.CLASSIFICATION,
                                                 prediction_type = PredictionType.SINGLE_VALUE,
                                                 sequence_feature_columns = [xc],
                                                 context_feature_columns = None,
                                                 num_units = [256, 128],
                                                 cell_type = 'lstm', 
                                                 optimizer = 'Adam',
                                                 learning_rate = LEARNING_RATE,
                                                 num_classes = 2)

# create experiment
def generate_experiment_fn():
  
  """
  Create an experiment function given hyperparameters.
  Returns:
    A function (output_dir) -> Experiment where output_dir is a string
    representing the location of summaries, checkpoints, and exports.
    this function is used by learn_runner to create an Experiment which
    executes model code provided in the form of an Estimator and
    input functions.
    All listed arguments in the outer function are used to create an
    Estimator, and input functions (training, evaluation, serving).
    Unlisted args are passed through to Experiment.
  """

  def _experiment_fn(output_dir):

    train_input = train_input_fn
    test_input = test_input_fn
    
    return tf.contrib.learn.Experiment(
        estimator,
        train_input_fn=train_input,
        eval_input_fn=test_input,
        train_steps=STEPS
    )
  return _experiment_fn

# run experiment 
learn_runner.run(generate_experiment_fn(), output_dir='/tmp/sentiment_analysis')

Instructions for updating:
This file will be removed after the deprecation date.Please switch to third_party/tensorflow/contrib/learn/python/learn/estimators/head.py
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb33b7e2710>, '_task_type': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_model_dir': '/tmp/tmpotsth6mx', '_tf_random_seed': None, '_master': '', '_evaluation_master': '', '_keep_checkpoint_max': 5, '_save_checkpoints_secs': 600, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 0, '_task_id': 0, '_is_chief': True, '_num_ps_replicas': 0, '_session_config': None, '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_environment': 'local'}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpotsth6mx/model.ckpt.
INFO:tensorflow:loss = 0.721445, step = 1
INFO:tensorflow:Starting evaluation at 2017-06-20-23:11:03
INFO:tensorflow:Restoring parameters from /tmp/tmpotsth6mx/model.ckpt-1
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/10

INFO:tensorflow:Evaluation [35/100]
INFO:tensorflow:Evaluation [36/100]
INFO:tensorflow:Evaluation [37/100]
INFO:tensorflow:Evaluation [38/100]
INFO:tensorflow:Evaluation [39/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [41/100]
INFO:tensorflow:Evaluation [42/100]
INFO:tensorflow:Evaluation [43/100]
INFO:tensorflow:Evaluation [44/100]
INFO:tensorflow:Evaluation [45/100]
INFO:tensorflow:Evaluation [46/100]
INFO:tensorflow:Evaluation [47/100]
INFO:tensorflow:Evaluation [48/100]
INFO:tensorflow:Evaluation [49/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [51/100]
INFO:tensorflow:Evaluation [52/100]
INFO:tensorflow:Evaluation [53/100]
INFO:tensorflow:Evaluation [54/100]
INFO:tensorflow:Evaluation [55/100]
INFO:tensorflow:Evaluation [56/100]
INFO:tensorflow:Evaluation [57/100]
INFO:tensorflow:Evaluation [58/100]
INFO:tensorflow:Evaluation [59/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [61/100]
INFO:tensorflow:Evaluation [

INFO:tensorflow:Validation (step 3769): loss = 0.693164, accuracy = 0.508281, global_step = 3769
INFO:tensorflow:global_step/sec: 1.77159
INFO:tensorflow:loss = 0.674979, step = 3801 (56.446 sec)
INFO:tensorflow:global_step/sec: 2.44544
INFO:tensorflow:loss = 0.697432, step = 3901 (40.894 sec)
INFO:tensorflow:global_step/sec: 2.4007
INFO:tensorflow:loss = 0.680676, step = 4001 (41.653 sec)
INFO:tensorflow:global_step/sec: 2.45829
INFO:tensorflow:loss = 0.702, step = 4101 (40.680 sec)
INFO:tensorflow:global_step/sec: 2.45709
INFO:tensorflow:loss = 0.691294, step = 4201 (40.698 sec)
INFO:tensorflow:global_step/sec: 2.44354
INFO:tensorflow:loss = 0.690869, step = 4301 (40.924 sec)
INFO:tensorflow:global_step/sec: 2.46825
INFO:tensorflow:loss = 0.683967, step = 4401 (40.515 sec)
INFO:tensorflow:global_step/sec: 2.45343
INFO:tensorflow:loss = 0.68796, step = 4501 (40.758 sec)
INFO:tensorflow:global_step/sec: 2.43052
INFO:tensorflow:loss = 0.681233, step = 4601 (41.144 sec)
INFO:tensorflow:g

KeyboardInterrupt: 

In [None]:
# generate predictions
preds = list(estimator.predict(input_fn=test_input_fn))

# number of outputs we want to see the prediction
NUM_EVAL = 10
def check_prediction(x, y, p, index):
    print('prediction:', p[index]['classes'])
    print('target:', y[index])
    print('sentence:', get_sentence(x[index]))

for i in range(NUM_EVAL):
    index = np.random.randint(limit)
    print('test:', index)s
    print('-' * 30)
    print(np.asarray(x_test[index], dtype=np.int32))
    check_prediction(x_test, y_test, preds, index)
    print()