# Neural Bag of Words for Amazon Reviews

In [1]:
from __future__ import division
import os, sys, re, json, time, datetime, shutil
import itertools, collections
from importlib import reload
from IPython.display import display, HTML

# NLTK for NLP utils and corpora
import nltk

# NumPy and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf
assert(tf.__version__.startswith("1.8"))

# This creates a symbolic link on python in tmp directory

# Helper libraries for 's instance
from w266_common import utils, vocabulary, tf_embed_viz, treeviz
from w266_common import patched_numpy_io

# Code for this assignment

import models

import nltk
from nltk.tokenize import word_tokenize

## Word Vectors

In [2]:
from pathlib import Path
import os.path
wordsList = np.load(os.path.join(str(Path.home()), '.kaggle/wordvectors/pretrained_glove/wordsList.npy'))
wordsList = wordsList.tolist() #Originally loaded as numpy array
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8
wordVectors = np.load(os.path.join(str(Path.home()), '.kaggle/wordvectors/pretrained_glove/wordVectors.npy'))

## Load Reviews

In [3]:
review_df = pd.read_csv('~/.kaggle/datasets/snap/amazon-fine-food-reviews/Reviews.csv', encoding='utf8')
review_df = review_df.drop(['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator', 'Time', 'Summary'], axis=1)

In [4]:
# Reduce size for development
#numReviews = 100
#review_df = review_df.loc[0:numReviews-1]

Remove reviews where the score is 3.  This is an ambiguous case for binary classification.

In [5]:
review_df = review_df[review_df.Score != 3]
review_df.reset_index(inplace=True)

## Tokenize

In [6]:
review_df['Tokens'] = review_df['Text'].apply(lambda text: word_tokenize(text.lower()))

## Train, Dev, Test data

In [7]:
# Train with 60%, , Dev: 10%, Test: 30%
train_percent = 0.6
dev_percent = 0.1
test_percent = 0.3

# Get indicies of the rows in the dataframe for training and testing
train_lower_index = 0
train_upper_index = train_lower_index + round(len(review_df)*train_percent)
dev_lower_index   = train_upper_index+1
dev_upper_index   = dev_lower_index + round(len(review_df)*dev_percent)
test_lower_index  = dev_upper_index+1
test_upper_index  = len(review_df)-1

In [8]:
maxSeqLength = 267  # Determined by EDA

In [9]:
word_ids = np.zeros([review_df.shape[0], maxSeqLength], dtype=np.int32)
word_ids_ns = np.zeros([review_df.shape[0]], dtype=np.int32)
word_ids_labels = np.zeros([review_df.shape[0]], dtype=np.int32)

In [10]:
for sentence_index, row in review_df.iterrows():
    
    word_index = 0
    
    for word in row['Tokens']:

        try:
            word_ids[sentence_index][word_index] = wordsList.index(word)
        except ValueError:
            word_ids[sentence_index][word_index] = 399999 #Vector for unkown words
        
        word_index = word_index + 1

        if word_index == maxSeqLength:
            break

    word_ids_ns[sentence_index] = word_index

    if row['Score'] > 3:
        word_ids_labels[sentence_index] = 1
    elif row['Score'] < 3:
        word_ids_labels[sentence_index] = 0

In [11]:
def Split(lower_idx, upper_idx):
    return word_ids[lower_idx:upper_idx], word_ids_ns[lower_idx:upper_idx], word_ids_labels[lower_idx:upper_idx]

In [12]:
train_x, train_ns, train_y = Split(train_lower_index, train_upper_index)
dev_x,   dev_ns,   dev_y   = Split(dev_lower_index, dev_upper_index)
test_x,  test_ns,  test_y  = Split(test_lower_index, test_upper_index)

## Model Parameters

In [13]:
import models; reload(models)

<module 'models' from '/home/matt/w266_final_project/NBOW/models.py'>

In [14]:
# Specify model hyperparameters as used by model_fn
model_params = dict(V=len(wordsList),
                    embed_dim=50,
                    hidden_dims=[25],
                    num_classes=2,      # 2 for binary classifier
                    encoder_type='bow',
                    lr=0.1,
                    optimizer='adagrad',
                    beta=0.01,
                    dropout_rate=0.1)  # fill this in

## Training Parameters

In [15]:
# Specify training schedule
train_params = dict(batch_size=32,
                    total_epochs=20,
                    eval_every=2)  # fill this in

In [16]:
checkpoint_dir = "/tmp/tf_bow_sst_" + datetime.datetime.now().strftime("%Y%m%d-%H%M")

if os.path.isdir(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)
#ds.vocab.write_projector_config(checkpoint_dir, "Encoder/Embedding_Layer/W_embed")

In [17]:
model = tf.estimator.Estimator(model_fn=models.classifier_model_fn,
                               params=model_params,
                               model_dir=checkpoint_dir)

print("\nTo view training (once it starts), run:\n")
print("    tensorboard --logdir='{:s}' --port 6006".format(checkpoint_dir))
print("\nThen in your browser, open: http://localhost:6006\n")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': 'worker', '_session_config': None, '_log_step_count_steps': 100, '_task_id': 0, '_service': None, '_save_summary_steps': 100, '_model_dir': '/tmp/tf_bow_sst_20180717-1728', '_evaluation_master': '', '_save_checkpoints_steps': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f109e4fb940>, '_save_checkpoints_secs': 600, '_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_master': '', '_global_id_in_cluster': 0, '_num_ps_replicas': 0, '_tf_random_seed': None, '_train_distribute': None, '_num_worker_replicas': 1}

To view training (once it starts), run:

    tensorboard --logdir='/tmp/tf_bow_sst_20180717-1728' --port 6006

Then in your browser, open: http://localhost:6006



In [18]:
train_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": train_x, "ns": train_ns},
                    y=train_y,
                    batch_size=train_params['batch_size'], 
                    num_epochs=train_params['eval_every'],
                    shuffle=True,
                    seed=42)

dev_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": dev_x, "ns": dev_ns},
                    y=dev_y,
                    batch_size=128,
                    num_epochs=1,
                    shuffle=False)

## Train

In [19]:
for _ in range(train_params['total_epochs'] // train_params['eval_every']):
    model.train(input_fn=train_input_fn)
    model.evaluate(input_fn=dev_input_fn, name="dev")

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tf_bow_sst_20180717-1728/model.ckpt.
INFO:tensorflow:step = 1, loss = 1.0592134
INFO:tensorflow:global_step/sec: 101.091
INFO:tensorflow:step = 101, loss = 0.7864195 (0.990 sec)
INFO:tensorflow:global_step/sec: 121.674
INFO:tensorflow:step = 201, loss = 0.9445193 (0.823 sec)
INFO:tensorflow:global_step/sec: 112.952
INFO:tensorflow:step = 301, loss = 0.5846317 (0.884 sec)
INFO:tensorflow:global_step/sec: 115.383
INFO:tensorflow:step = 401, loss = 0.5908887 (0.868 sec)
INFO:tensorflow:global_step/sec: 114.112
INFO:tensorflow:step = 501, loss = 0.59412897 (0.875 sec)
INFO:tensorflow:global_step/sec: 113.806
INFO:tensorflow:step = 601, loss = 0.33580065 (0.880 sec)
INFO:tensorflow:global_step/sec: 116.825
I

INFO:tensorflow:global_step/sec: 113.799
INFO:tensorflow:step = 8001, loss = 0.24766192 (0.880 sec)
INFO:tensorflow:global_step/sec: 117.411
INFO:tensorflow:step = 8101, loss = 0.45663005 (0.851 sec)
INFO:tensorflow:global_step/sec: 117.95
INFO:tensorflow:step = 8201, loss = 0.3395608 (0.847 sec)
INFO:tensorflow:global_step/sec: 120.413
INFO:tensorflow:step = 8301, loss = 0.24136609 (0.831 sec)
INFO:tensorflow:global_step/sec: 121.128
INFO:tensorflow:step = 8401, loss = 0.29774275 (0.826 sec)
INFO:tensorflow:global_step/sec: 119.162
INFO:tensorflow:step = 8501, loss = 0.3203095 (0.841 sec)
INFO:tensorflow:global_step/sec: 118.053
INFO:tensorflow:step = 8601, loss = 0.4368946 (0.846 sec)
INFO:tensorflow:global_step/sec: 118.242
INFO:tensorflow:step = 8701, loss = 0.22556734 (0.847 sec)
INFO:tensorflow:global_step/sec: 114.133
INFO:tensorflow:step = 8801, loss = 0.33870816 (0.873 sec)
INFO:tensorflow:global_step/sec: 117.425
INFO:tensorflow:step = 8901, loss = 0.24375242 (0.852 sec)
INFO

INFO:tensorflow:global_step/sec: 118.786
INFO:tensorflow:step = 16201, loss = 0.34974927 (0.842 sec)
INFO:tensorflow:global_step/sec: 119.755
INFO:tensorflow:step = 16301, loss = 0.2175719 (0.836 sec)
INFO:tensorflow:global_step/sec: 116.593
INFO:tensorflow:step = 16401, loss = 0.51471573 (0.858 sec)
INFO:tensorflow:global_step/sec: 120.267
INFO:tensorflow:step = 16501, loss = 0.15434343 (0.836 sec)
INFO:tensorflow:global_step/sec: 117.267
INFO:tensorflow:step = 16601, loss = 0.1998584 (0.847 sec)
INFO:tensorflow:global_step/sec: 119.4
INFO:tensorflow:step = 16701, loss = 0.47875792 (0.837 sec)
INFO:tensorflow:global_step/sec: 119.434
INFO:tensorflow:step = 16801, loss = 0.2791302 (0.838 sec)
INFO:tensorflow:global_step/sec: 116.169
INFO:tensorflow:step = 16901, loss = 0.25055173 (0.860 sec)
INFO:tensorflow:global_step/sec: 116.543
INFO:tensorflow:step = 17001, loss = 0.13407105 (0.859 sec)
INFO:tensorflow:global_step/sec: 118.469
INFO:tensorflow:step = 17101, loss = 0.3642052 (0.844 s

INFO:tensorflow:step = 23219, loss = 0.460176 (0.923 sec)
INFO:tensorflow:global_step/sec: 114.177
INFO:tensorflow:step = 23319, loss = 0.14848271 (0.875 sec)
INFO:tensorflow:global_step/sec: 114.145
INFO:tensorflow:step = 23419, loss = 0.34020025 (0.879 sec)
INFO:tensorflow:global_step/sec: 113.64
INFO:tensorflow:step = 23519, loss = 0.20910297 (0.876 sec)
INFO:tensorflow:global_step/sec: 116.641
INFO:tensorflow:step = 23619, loss = 0.30172288 (0.858 sec)
INFO:tensorflow:global_step/sec: 111.175
INFO:tensorflow:step = 23719, loss = 0.25215933 (0.898 sec)
INFO:tensorflow:global_step/sec: 114.149
INFO:tensorflow:step = 23819, loss = 0.16695473 (0.876 sec)
INFO:tensorflow:global_step/sec: 115.296
INFO:tensorflow:step = 23919, loss = 0.15160641 (0.877 sec)
INFO:tensorflow:global_step/sec: 113.316
INFO:tensorflow:step = 24019, loss = 0.33834547 (0.873 sec)
INFO:tensorflow:global_step/sec: 118.655
INFO:tensorflow:step = 24119, loss = 0.28974828 (0.842 sec)
INFO:tensorflow:global_step/sec: 1

INFO:tensorflow:global_step/sec: 114.791
INFO:tensorflow:step = 31419, loss = 0.3115425 (0.871 sec)
INFO:tensorflow:global_step/sec: 113.542
INFO:tensorflow:step = 31519, loss = 0.20962685 (0.883 sec)
INFO:tensorflow:global_step/sec: 114.852
INFO:tensorflow:step = 31619, loss = 0.3751669 (0.869 sec)
INFO:tensorflow:global_step/sec: 118.469
INFO:tensorflow:step = 31719, loss = 0.25038916 (0.846 sec)
INFO:tensorflow:global_step/sec: 115.021
INFO:tensorflow:step = 31819, loss = 0.22831964 (0.868 sec)
INFO:tensorflow:global_step/sec: 116.985
INFO:tensorflow:step = 31919, loss = 0.12482907 (0.854 sec)
INFO:tensorflow:global_step/sec: 113.525
INFO:tensorflow:step = 32019, loss = 0.36151224 (0.881 sec)
INFO:tensorflow:global_step/sec: 119.497
INFO:tensorflow:step = 32119, loss = 0.2893338 (0.837 sec)
INFO:tensorflow:global_step/sec: 117.35
INFO:tensorflow:step = 32219, loss = 0.2553883 (0.854 sec)
INFO:tensorflow:global_step/sec: 116.741
INFO:tensorflow:step = 32319, loss = 0.24517801 (0.858 

INFO:tensorflow:Loss for final step: 0.26313317.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-18-00:34:01
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20180717-1728/model.ckpt-39436
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-18-00:34:06
INFO:tensorflow:Saving dict for global step 39436: accuracy = 0.93503356, cross_entropy_loss = 0.17446001, global_step = 39436, loss = 0.32280108
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20180717-1728/model.ckpt-39436
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 39437 into /tmp/tf_bow_sst_20180717-1728

INFO:tensorflow:global_step/sec: 112.149
INFO:tensorflow:step = 46637, loss = 0.22669405 (0.891 sec)
INFO:tensorflow:global_step/sec: 117.556
INFO:tensorflow:step = 46737, loss = 0.32862803 (0.851 sec)
INFO:tensorflow:global_step/sec: 117.714
INFO:tensorflow:step = 46837, loss = 0.15009983 (0.849 sec)
INFO:tensorflow:global_step/sec: 116.682
INFO:tensorflow:step = 46937, loss = 0.13657883 (0.858 sec)
INFO:tensorflow:global_step/sec: 114.184
INFO:tensorflow:step = 47037, loss = 0.14819229 (0.875 sec)
INFO:tensorflow:global_step/sec: 113.896
INFO:tensorflow:step = 47137, loss = 0.16800684 (0.878 sec)
INFO:tensorflow:global_step/sec: 120.743
INFO:tensorflow:step = 47237, loss = 0.20824507 (0.828 sec)
INFO:tensorflow:global_step/sec: 116.299
INFO:tensorflow:step = 47337, loss = 0.26943922 (0.862 sec)
INFO:tensorflow:global_step/sec: 118.356
INFO:tensorflow:step = 47437, loss = 0.21039796 (0.842 sec)
INFO:tensorflow:global_step/sec: 118.728
INFO:tensorflow:step = 47537, loss = 0.37959173 (0

INFO:tensorflow:global_step/sec: 116.675
INFO:tensorflow:step = 54837, loss = 0.15435576 (0.859 sec)
INFO:tensorflow:global_step/sec: 115.947
INFO:tensorflow:step = 54937, loss = 0.108597554 (0.866 sec)
INFO:tensorflow:global_step/sec: 117.64
INFO:tensorflow:step = 55037, loss = 0.19234207 (0.844 sec)
INFO:tensorflow:global_step/sec: 118.702
INFO:tensorflow:step = 55137, loss = 0.15777501 (0.845 sec)
INFO:tensorflow:global_step/sec: 114.993
INFO:tensorflow:step = 55237, loss = 0.34052595 (0.867 sec)
INFO:tensorflow:global_step/sec: 115.416
INFO:tensorflow:step = 55337, loss = 0.28047508 (0.867 sec)
INFO:tensorflow:global_step/sec: 121.355
INFO:tensorflow:step = 55437, loss = 0.21695095 (0.823 sec)
INFO:tensorflow:global_step/sec: 120.701
INFO:tensorflow:step = 55537, loss = 0.23865783 (0.829 sec)
INFO:tensorflow:global_step/sec: 120.857
INFO:tensorflow:step = 55637, loss = 0.35669646 (0.827 sec)
INFO:tensorflow:global_step/sec: 120.864
INFO:tensorflow:step = 55737, loss = 0.20231867 (0

INFO:tensorflow:global_step/sec: 113.061
INFO:tensorflow:step = 61855, loss = 0.13612011 (0.882 sec)
INFO:tensorflow:global_step/sec: 116.855
INFO:tensorflow:step = 61955, loss = 0.25198054 (0.856 sec)
INFO:tensorflow:global_step/sec: 117.668
INFO:tensorflow:step = 62055, loss = 0.33457175 (0.850 sec)
INFO:tensorflow:global_step/sec: 114.54
INFO:tensorflow:step = 62155, loss = 0.3546299 (0.872 sec)
INFO:tensorflow:global_step/sec: 118.94
INFO:tensorflow:step = 62255, loss = 0.38113987 (0.841 sec)
INFO:tensorflow:global_step/sec: 117.993
INFO:tensorflow:step = 62355, loss = 0.2098257 (0.848 sec)
INFO:tensorflow:global_step/sec: 114.593
INFO:tensorflow:step = 62455, loss = 0.19401225 (0.873 sec)
INFO:tensorflow:global_step/sec: 114.503
INFO:tensorflow:step = 62555, loss = 0.28583804 (0.876 sec)
INFO:tensorflow:global_step/sec: 112.437
INFO:tensorflow:step = 62655, loss = 0.31123775 (0.887 sec)
INFO:tensorflow:global_step/sec: 117.092
INFO:tensorflow:step = 62755, loss = 0.15680048 (0.855

INFO:tensorflow:global_step/sec: 117.635
INFO:tensorflow:step = 70055, loss = 0.15954965 (0.850 sec)
INFO:tensorflow:global_step/sec: 114.662
INFO:tensorflow:step = 70155, loss = 0.22629984 (0.872 sec)
INFO:tensorflow:global_step/sec: 113.808
INFO:tensorflow:step = 70255, loss = 0.13830137 (0.879 sec)
INFO:tensorflow:global_step/sec: 115.819
INFO:tensorflow:step = 70355, loss = 0.29853916 (0.864 sec)
INFO:tensorflow:global_step/sec: 117.108
INFO:tensorflow:step = 70455, loss = 0.2643779 (0.853 sec)
INFO:tensorflow:global_step/sec: 115.984
INFO:tensorflow:step = 70555, loss = 0.18139143 (0.862 sec)
INFO:tensorflow:global_step/sec: 116.836
INFO:tensorflow:step = 70655, loss = 0.17089583 (0.856 sec)
INFO:tensorflow:global_step/sec: 117.521
INFO:tensorflow:step = 70755, loss = 0.15438694 (0.852 sec)
INFO:tensorflow:global_step/sec: 116.791
INFO:tensorflow:step = 70855, loss = 0.30113053 (0.855 sec)
INFO:tensorflow:global_step/sec: 114.249
INFO:tensorflow:step = 70955, loss = 0.19823533 (0.

INFO:tensorflow:global_step/sec: 113.667
INFO:tensorflow:step = 78255, loss = 0.14832531 (0.882 sec)
INFO:tensorflow:global_step/sec: 114.87
INFO:tensorflow:step = 78355, loss = 0.27842355 (0.869 sec)
INFO:tensorflow:global_step/sec: 116.436
INFO:tensorflow:step = 78455, loss = 0.19139415 (0.859 sec)
INFO:tensorflow:global_step/sec: 115.185
INFO:tensorflow:step = 78555, loss = 0.47362393 (0.868 sec)
INFO:tensorflow:global_step/sec: 116.719
INFO:tensorflow:step = 78655, loss = 0.24458091 (0.856 sec)
INFO:tensorflow:global_step/sec: 115.781
INFO:tensorflow:step = 78755, loss = 0.21925274 (0.864 sec)
INFO:tensorflow:global_step/sec: 114.692
INFO:tensorflow:step = 78855, loss = 0.23460793 (0.872 sec)
INFO:tensorflow:Saving checkpoints for 78872 into /tmp/tf_bow_sst_20180717-1728/model.ckpt.
INFO:tensorflow:Loss for final step: 0.2416167.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-18-00:39:56
INFO:tensorflow:Graph 

INFO:tensorflow:global_step/sec: 117.801
INFO:tensorflow:step = 85273, loss = 0.20746118 (0.849 sec)
INFO:tensorflow:global_step/sec: 110.273
INFO:tensorflow:step = 85373, loss = 0.10915595 (0.908 sec)
INFO:tensorflow:global_step/sec: 111.488
INFO:tensorflow:step = 85473, loss = 0.16073528 (0.895 sec)
INFO:tensorflow:global_step/sec: 115.541
INFO:tensorflow:step = 85573, loss = 0.12781113 (0.865 sec)
INFO:tensorflow:global_step/sec: 112.904
INFO:tensorflow:step = 85673, loss = 0.28289187 (0.888 sec)
INFO:tensorflow:global_step/sec: 116.471
INFO:tensorflow:step = 85773, loss = 0.25676167 (0.857 sec)
INFO:tensorflow:global_step/sec: 117.424
INFO:tensorflow:step = 85873, loss = 0.24378109 (0.852 sec)
INFO:tensorflow:global_step/sec: 115.522
INFO:tensorflow:step = 85973, loss = 0.20769319 (0.866 sec)
INFO:tensorflow:global_step/sec: 116.474
INFO:tensorflow:step = 86073, loss = 0.21451601 (0.861 sec)
INFO:tensorflow:global_step/sec: 113.781
INFO:tensorflow:step = 86173, loss = 0.31701422 (0

INFO:tensorflow:step = 93373, loss = 0.26204702 (0.885 sec)
INFO:tensorflow:global_step/sec: 112.65
INFO:tensorflow:step = 93473, loss = 0.14028166 (0.887 sec)
INFO:tensorflow:global_step/sec: 116.264
INFO:tensorflow:step = 93573, loss = 0.25273848 (0.861 sec)
INFO:tensorflow:global_step/sec: 118.323
INFO:tensorflow:step = 93673, loss = 0.17683381 (0.844 sec)
INFO:tensorflow:global_step/sec: 115.899
INFO:tensorflow:step = 93773, loss = 0.204577 (0.864 sec)
INFO:tensorflow:global_step/sec: 110.726
INFO:tensorflow:step = 93873, loss = 0.38102037 (0.903 sec)
INFO:tensorflow:global_step/sec: 115.578
INFO:tensorflow:step = 93973, loss = 0.33564323 (0.866 sec)
INFO:tensorflow:global_step/sec: 116.009
INFO:tensorflow:step = 94073, loss = 0.24509598 (0.861 sec)
INFO:tensorflow:global_step/sec: 109.551
INFO:tensorflow:step = 94173, loss = 0.2979564 (0.915 sec)
INFO:tensorflow:global_step/sec: 110.766
INFO:tensorflow:step = 94273, loss = 0.1461757 (0.900 sec)
INFO:tensorflow:global_step/sec: 114

INFO:tensorflow:step = 100391, loss = 0.18468538 (0.877 sec)
INFO:tensorflow:global_step/sec: 116.046
INFO:tensorflow:step = 100491, loss = 0.31315148 (0.862 sec)
INFO:tensorflow:global_step/sec: 117.98
INFO:tensorflow:step = 100591, loss = 0.28519577 (0.848 sec)
INFO:tensorflow:global_step/sec: 117.169
INFO:tensorflow:step = 100691, loss = 0.15996778 (0.854 sec)
INFO:tensorflow:global_step/sec: 117.63
INFO:tensorflow:step = 100791, loss = 0.19768414 (0.855 sec)
INFO:tensorflow:global_step/sec: 116.364
INFO:tensorflow:step = 100891, loss = 0.25821415 (0.854 sec)
INFO:tensorflow:global_step/sec: 116.332
INFO:tensorflow:step = 100991, loss = 0.13278422 (0.860 sec)
INFO:tensorflow:global_step/sec: 116.477
INFO:tensorflow:step = 101091, loss = 0.122453935 (0.862 sec)
INFO:tensorflow:global_step/sec: 112.399
INFO:tensorflow:step = 101191, loss = 0.19924715 (0.885 sec)
INFO:tensorflow:global_step/sec: 115.587
INFO:tensorflow:step = 101291, loss = 0.1336789 (0.865 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 118.808
INFO:tensorflow:step = 108491, loss = 0.38297498 (0.842 sec)
INFO:tensorflow:global_step/sec: 115.081
INFO:tensorflow:step = 108591, loss = 0.22188331 (0.869 sec)
INFO:tensorflow:global_step/sec: 117.044
INFO:tensorflow:step = 108691, loss = 0.26959497 (0.854 sec)
INFO:tensorflow:global_step/sec: 115.179
INFO:tensorflow:step = 108791, loss = 0.18990093 (0.868 sec)
INFO:tensorflow:global_step/sec: 119.791
INFO:tensorflow:step = 108891, loss = 0.27379626 (0.835 sec)
INFO:tensorflow:global_step/sec: 117.674
INFO:tensorflow:step = 108991, loss = 0.20840596 (0.850 sec)
INFO:tensorflow:global_step/sec: 112.918
INFO:tensorflow:step = 109091, loss = 0.24605975 (0.886 sec)
INFO:tensorflow:global_step/sec: 112.888
INFO:tensorflow:step = 109191, loss = 0.34242928 (0.886 sec)
INFO:tensorflow:global_step/sec: 115.821
INFO:tensorflow:step = 109291, loss = 0.30864036 (0.863 sec)
INFO:tensorflow:global_step/sec: 117.677
INFO:tensorflow:step = 109391, loss = 0.1

INFO:tensorflow:global_step/sec: 113.294
INFO:tensorflow:step = 116591, loss = 0.26717088 (0.888 sec)
INFO:tensorflow:global_step/sec: 111.674
INFO:tensorflow:step = 116691, loss = 0.21075322 (0.890 sec)
INFO:tensorflow:global_step/sec: 108.99
INFO:tensorflow:step = 116791, loss = 0.19294518 (0.918 sec)
INFO:tensorflow:global_step/sec: 114.066
INFO:tensorflow:step = 116891, loss = 0.19277513 (0.877 sec)
INFO:tensorflow:global_step/sec: 118.302
INFO:tensorflow:step = 116991, loss = 0.20132235 (0.845 sec)
INFO:tensorflow:global_step/sec: 113.056
INFO:tensorflow:step = 117091, loss = 0.31040034 (0.885 sec)
INFO:tensorflow:global_step/sec: 114.722
INFO:tensorflow:step = 117191, loss = 0.24701527 (0.872 sec)
INFO:tensorflow:global_step/sec: 116.968
INFO:tensorflow:step = 117291, loss = 0.25833306 (0.856 sec)
INFO:tensorflow:global_step/sec: 116.969
INFO:tensorflow:step = 117391, loss = 0.3928954 (0.856 sec)
INFO:tensorflow:global_step/sec: 118.189
INFO:tensorflow:step = 117491, loss = 0.146

INFO:tensorflow:step = 123509, loss = 0.30930772 (0.863 sec)
INFO:tensorflow:global_step/sec: 113.399
INFO:tensorflow:step = 123609, loss = 0.39356256 (0.885 sec)
INFO:tensorflow:global_step/sec: 115.576
INFO:tensorflow:step = 123709, loss = 0.19174539 (0.865 sec)
INFO:tensorflow:global_step/sec: 113.956
INFO:tensorflow:step = 123809, loss = 0.1617193 (0.876 sec)
INFO:tensorflow:global_step/sec: 114.461
INFO:tensorflow:step = 123909, loss = 0.25344256 (0.873 sec)
INFO:tensorflow:global_step/sec: 114.737
INFO:tensorflow:step = 124009, loss = 0.16522034 (0.872 sec)
INFO:tensorflow:global_step/sec: 114.868
INFO:tensorflow:step = 124109, loss = 0.25374728 (0.870 sec)
INFO:tensorflow:global_step/sec: 115.044
INFO:tensorflow:step = 124209, loss = 0.24623667 (0.869 sec)
INFO:tensorflow:global_step/sec: 109.613
INFO:tensorflow:step = 124309, loss = 0.21109098 (0.912 sec)
INFO:tensorflow:global_step/sec: 115.089
INFO:tensorflow:step = 124409, loss = 0.4085729 (0.869 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 114.257
INFO:tensorflow:step = 131609, loss = 0.2402772 (0.876 sec)
INFO:tensorflow:global_step/sec: 114.578
INFO:tensorflow:step = 131709, loss = 0.27607983 (0.872 sec)
INFO:tensorflow:global_step/sec: 114.844
INFO:tensorflow:step = 131809, loss = 0.27376363 (0.872 sec)
INFO:tensorflow:global_step/sec: 114.954
INFO:tensorflow:step = 131909, loss = 0.44951198 (0.869 sec)
INFO:tensorflow:global_step/sec: 117.412
INFO:tensorflow:step = 132009, loss = 0.1692172 (0.851 sec)
INFO:tensorflow:global_step/sec: 114.086
INFO:tensorflow:step = 132109, loss = 0.1563043 (0.877 sec)
INFO:tensorflow:global_step/sec: 113.142
INFO:tensorflow:step = 132209, loss = 0.24547479 (0.885 sec)
INFO:tensorflow:global_step/sec: 109.425
INFO:tensorflow:step = 132309, loss = 0.2571242 (0.913 sec)
INFO:tensorflow:global_step/sec: 111.091
INFO:tensorflow:step = 132409, loss = 0.17196381 (0.900 sec)
INFO:tensorflow:global_step/sec: 111.766
INFO:tensorflow:step = 132509, loss = 0.20281

INFO:tensorflow:step = 138527, loss = 0.3005681 (0.877 sec)
INFO:tensorflow:global_step/sec: 116.07
INFO:tensorflow:step = 138627, loss = 0.1630491 (0.867 sec)
INFO:tensorflow:global_step/sec: 111.985
INFO:tensorflow:step = 138727, loss = 0.23823053 (0.887 sec)
INFO:tensorflow:global_step/sec: 113.088
INFO:tensorflow:step = 138827, loss = 0.11999336 (0.885 sec)
INFO:tensorflow:global_step/sec: 115.148
INFO:tensorflow:step = 138927, loss = 0.30345592 (0.868 sec)
INFO:tensorflow:global_step/sec: 117.906
INFO:tensorflow:step = 139027, loss = 0.15515238 (0.849 sec)
INFO:tensorflow:global_step/sec: 118.793
INFO:tensorflow:step = 139127, loss = 0.16330321 (0.842 sec)
INFO:tensorflow:global_step/sec: 117.117
INFO:tensorflow:step = 139227, loss = 0.30014342 (0.853 sec)
INFO:tensorflow:global_step/sec: 117.16
INFO:tensorflow:step = 139327, loss = 0.27053463 (0.853 sec)
INFO:tensorflow:global_step/sec: 115.179
INFO:tensorflow:step = 139427, loss = 0.16885123 (0.868 sec)
INFO:tensorflow:global_st

INFO:tensorflow:global_step/sec: 117.271
INFO:tensorflow:step = 146627, loss = 0.26008657 (0.854 sec)
INFO:tensorflow:global_step/sec: 114.358
INFO:tensorflow:step = 146727, loss = 0.16128749 (0.873 sec)
INFO:tensorflow:global_step/sec: 114.326
INFO:tensorflow:step = 146827, loss = 0.19933632 (0.875 sec)
INFO:tensorflow:global_step/sec: 118.18
INFO:tensorflow:step = 146927, loss = 0.19304647 (0.849 sec)
INFO:tensorflow:global_step/sec: 118.908
INFO:tensorflow:step = 147027, loss = 0.24445182 (0.838 sec)
INFO:tensorflow:global_step/sec: 118.711
INFO:tensorflow:step = 147127, loss = 0.23646483 (0.842 sec)
INFO:tensorflow:global_step/sec: 115.66
INFO:tensorflow:step = 147227, loss = 0.25488418 (0.865 sec)
INFO:tensorflow:global_step/sec: 114.042
INFO:tensorflow:step = 147327, loss = 0.18567139 (0.877 sec)
INFO:tensorflow:global_step/sec: 114.634
INFO:tensorflow:step = 147427, loss = 0.18811674 (0.874 sec)
INFO:tensorflow:global_step/sec: 118.446
INFO:tensorflow:step = 147527, loss = 0.161

INFO:tensorflow:global_step/sec: 116.224
INFO:tensorflow:step = 154727, loss = 0.4287698 (0.861 sec)
INFO:tensorflow:global_step/sec: 119.617
INFO:tensorflow:step = 154827, loss = 0.2238568 (0.836 sec)
INFO:tensorflow:global_step/sec: 115.252
INFO:tensorflow:step = 154927, loss = 0.1837048 (0.868 sec)
INFO:tensorflow:global_step/sec: 119.33
INFO:tensorflow:step = 155027, loss = 0.13022543 (0.838 sec)
INFO:tensorflow:global_step/sec: 119.211
INFO:tensorflow:step = 155127, loss = 0.20793054 (0.840 sec)
INFO:tensorflow:global_step/sec: 118.643
INFO:tensorflow:step = 155227, loss = 0.13945541 (0.841 sec)
INFO:tensorflow:global_step/sec: 117.258
INFO:tensorflow:step = 155327, loss = 0.1998972 (0.853 sec)
INFO:tensorflow:global_step/sec: 119.56
INFO:tensorflow:step = 155427, loss = 0.27436674 (0.838 sec)
INFO:tensorflow:global_step/sec: 116.546
INFO:tensorflow:step = 155527, loss = 0.17355052 (0.859 sec)
INFO:tensorflow:global_step/sec: 120.347
INFO:tensorflow:step = 155627, loss = 0.2075281

INFO:tensorflow:step = 161645, loss = 0.30801946 (0.833 sec)
INFO:tensorflow:global_step/sec: 123.16
INFO:tensorflow:step = 161745, loss = 0.20645265 (0.812 sec)
INFO:tensorflow:global_step/sec: 118.74
INFO:tensorflow:step = 161845, loss = 0.16553363 (0.842 sec)
INFO:tensorflow:global_step/sec: 115.304
INFO:tensorflow:step = 161945, loss = 0.1601947 (0.867 sec)
INFO:tensorflow:global_step/sec: 119.844
INFO:tensorflow:step = 162045, loss = 0.3355581 (0.835 sec)
INFO:tensorflow:global_step/sec: 117.521
INFO:tensorflow:step = 162145, loss = 0.26059365 (0.852 sec)
INFO:tensorflow:global_step/sec: 116.415
INFO:tensorflow:step = 162245, loss = 0.24255134 (0.858 sec)
INFO:tensorflow:global_step/sec: 116.778
INFO:tensorflow:step = 162345, loss = 0.2330941 (0.860 sec)
INFO:tensorflow:global_step/sec: 117.408
INFO:tensorflow:step = 162445, loss = 0.16208252 (0.848 sec)
INFO:tensorflow:global_step/sec: 116.12
INFO:tensorflow:step = 162545, loss = 0.14673892 (0.861 sec)
INFO:tensorflow:global_step

INFO:tensorflow:global_step/sec: 116.677
INFO:tensorflow:step = 169745, loss = 0.24503757 (0.857 sec)
INFO:tensorflow:global_step/sec: 120.088
INFO:tensorflow:step = 169845, loss = 0.18952528 (0.832 sec)
INFO:tensorflow:global_step/sec: 117.639
INFO:tensorflow:step = 169945, loss = 0.1363526 (0.850 sec)
INFO:tensorflow:global_step/sec: 117.965
INFO:tensorflow:step = 170045, loss = 0.31273025 (0.849 sec)
INFO:tensorflow:global_step/sec: 115.627
INFO:tensorflow:step = 170145, loss = 0.28737253 (0.863 sec)
INFO:tensorflow:global_step/sec: 119.613
INFO:tensorflow:step = 170245, loss = 0.23928882 (0.838 sec)
INFO:tensorflow:global_step/sec: 117.478
INFO:tensorflow:step = 170345, loss = 0.21002708 (0.849 sec)
INFO:tensorflow:global_step/sec: 118.419
INFO:tensorflow:step = 170445, loss = 0.17124546 (0.844 sec)
INFO:tensorflow:global_step/sec: 117.974
INFO:tensorflow:step = 170545, loss = 0.16955736 (0.848 sec)
INFO:tensorflow:global_step/sec: 117.875
INFO:tensorflow:step = 170645, loss = 0.28

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20180717-1728/model.ckpt-177462
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-18-00:54:51
INFO:tensorflow:Saving dict for global step 177462: accuracy = 0.9391605, cross_entropy_loss = 0.17173782, global_step = 177462, loss = 0.31597835
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20180717-1728/model.ckpt-177462
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 177463 into /tmp/tf_bow_sst_20180717-1728/model.ckpt.
INFO:tensorflow:step = 177463, loss = 0.41262054
INFO:tensorflow:global_step/sec: 101.203
INFO:tensorflow:step = 177563, loss = 0.236345 (0.984 sec)
INFO:tensorflow

INFO:tensorflow:global_step/sec: 115.678
INFO:tensorflow:step = 184763, loss = 0.29160124 (0.865 sec)
INFO:tensorflow:global_step/sec: 115.499
INFO:tensorflow:step = 184863, loss = 0.14080857 (0.865 sec)
INFO:tensorflow:global_step/sec: 116.018
INFO:tensorflow:step = 184963, loss = 0.13628009 (0.862 sec)
INFO:tensorflow:global_step/sec: 116.141
INFO:tensorflow:step = 185063, loss = 0.14507467 (0.861 sec)
INFO:tensorflow:global_step/sec: 114.566
INFO:tensorflow:step = 185163, loss = 0.14570624 (0.873 sec)
INFO:tensorflow:global_step/sec: 110.27
INFO:tensorflow:step = 185263, loss = 0.18058497 (0.907 sec)
INFO:tensorflow:global_step/sec: 114.068
INFO:tensorflow:step = 185363, loss = 0.2696957 (0.876 sec)
INFO:tensorflow:global_step/sec: 113.133
INFO:tensorflow:step = 185463, loss = 0.20768288 (0.884 sec)
INFO:tensorflow:global_step/sec: 109.349
INFO:tensorflow:step = 185563, loss = 0.32573012 (0.917 sec)
INFO:tensorflow:global_step/sec: 112.091
INFO:tensorflow:step = 185663, loss = 0.263

INFO:tensorflow:global_step/sec: 113.289
INFO:tensorflow:step = 192863, loss = 0.13772458 (0.883 sec)
INFO:tensorflow:global_step/sec: 109.449
INFO:tensorflow:step = 192963, loss = 0.11144298 (0.914 sec)
INFO:tensorflow:global_step/sec: 109.181
INFO:tensorflow:step = 193063, loss = 0.1829312 (0.916 sec)
INFO:tensorflow:global_step/sec: 112.959
INFO:tensorflow:step = 193163, loss = 0.16049416 (0.885 sec)
INFO:tensorflow:global_step/sec: 113.609
INFO:tensorflow:step = 193263, loss = 0.33183205 (0.881 sec)
INFO:tensorflow:global_step/sec: 113.696
INFO:tensorflow:step = 193363, loss = 0.28532064 (0.878 sec)
INFO:tensorflow:global_step/sec: 111.18
INFO:tensorflow:step = 193463, loss = 0.20312637 (0.900 sec)
INFO:tensorflow:global_step/sec: 115.866
INFO:tensorflow:step = 193563, loss = 0.199377 (0.864 sec)
INFO:tensorflow:global_step/sec: 118.046
INFO:tensorflow:step = 193663, loss = 0.34694678 (0.849 sec)
INFO:tensorflow:global_step/sec: 113.367
INFO:tensorflow:step = 193763, loss = 0.19462

## Test

In [20]:
test_input_fn = patched_numpy_io.numpy_input_fn(
                    x={"ids": test_x, "ns": test_ns},
                    y=test_y,
                    batch_size=128,
                    num_epochs=1,
                    shuffle=False)

eval_metrics = model.evaluate(input_fn=test_input_fn, name="test")
print("Accuracy on test set: {:.02%}".format(eval_metrics['accuracy']))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-18-00:57:50
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tf_bow_sst_20180717-1728/model.ckpt-197180
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-18-00:58:03
INFO:tensorflow:Saving dict for global step 197180: accuracy = 0.9365166, cross_entropy_loss = 0.18041915, global_step = 197180, loss = 0.32567444
Accuracy on test set: 93.65%
