In [1]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Predicting Movie Review Sentiment with BERT on TF Hub

If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.

Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing Tensorflow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.

Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in Tensorflow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!

In [1]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime




In addition to the standard libraries we imported above, we'll need to install BERT's python package.

In [None]:
#!pip install bert-tensorflow

Below, we'll set an output directory location to store our model output and checkpoints. This can be a local directory, in which case you'd set OUTPUT_DIR to the name of the directory you'd like to create. If you're running this code in Google's hosted Colab, the directory won't persist after the Colab session ends.

Alternatively, if you're a GCP user, you can store output in a GCP bucket. To do that, set a directory name in OUTPUT_DIR and the name of the GCP bucket in the BUCKET field.

Set DO_DELETE to rewrite the OUTPUT_DIR if it exists. Otherwise, Tensorflow will load existing model checkpoints from that directory (if they exist).

In [2]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = 'OUTPUT_DIR_NAME'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: OUTPUT_DIR_NAME *****


#Data

First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).

In [3]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [5]:
train, test = download_and_load_datasets()

Downloading data from http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz


To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

In [6]:
train = train.sample(5000)
test = test.sample(5000)

In [7]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)

In [8]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

#Data Preprocessing
We'll need to transform our data into a format BERT understands. This involves two steps. First, we create  `InputExample`'s using the constructor provided in the BERT library.

- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. 
- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.
- `label` is the label for our example, i.e. True, False

In [9]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):


1. Lowercase our text (if we're using a BERT lowercase model)
2. Tokenize it (i.e. "sally says hi" -> ["sally", "says", "hi"])
3. Break words into WordPieces (i.e. "calling" -> ["call", "##ing"])
4. Map our words to indexes using a vocab file that BERT provides
5. Add special "CLS" and "SEP" tokens (see the [readme](https://github.com/google-research/bert))
6. Append "index" and "segment" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))

Happily, we don't have to worry about most of these details.




To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:

In [10]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info["do_lower_case"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:

In [11]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer']

Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands.

In [12]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i ' ve read most of the comments here . i came to the conclusion that almost everybody agrees that 9 / 11 is a shocking piece of history . there are a few who think that the added narrative is weak and < br / > < br / > i agree that the narrative is weak and unnecessary . about two brothers finding each other back after the disaster and the cliff hang ##er about tony . but i don ' t think narration is unnecessary . like i lot of theorists i think that our own lives are narration ##s . we are living and making our own autobiography . so if we tell about our lives this is always in the form [SEP]


INFO:tensorflow:tokens: [CLS] i ' ve read most of the comments here . i came to the conclusion that almost everybody agrees that 9 / 11 is a shocking piece of history . there are a few who think that the added narrative is weak and < br / > < br / > i agree that the narrative is weak and unnecessary . about two brothers finding each other back after the disaster and the cliff hang ##er about tony . but i don ' t think narration is unnecessary . like i lot of theorists i think that our own lives are narration ##s . we are living and making our own autobiography . so if we tell about our lives this is always in the form [SEP]


INFO:tensorflow:input_ids: 101 1045 1005 2310 3191 2087 1997 1996 7928 2182 1012 1045 2234 2000 1996 7091 2008 2471 7955 10217 2008 1023 1013 2340 2003 1037 16880 3538 1997 2381 1012 2045 2024 1037 2261 2040 2228 2008 1996 2794 7984 2003 5410 1998 1026 7987 1013 1028 1026 7987 1013 1028 1045 5993 2008 1996 7984 2003 5410 1998 14203 1012 2055 2048 3428 4531 2169 2060 2067 2044 1996 7071 1998 1996 7656 6865 2121 2055 4116 1012 2021 1045 2123 1005 1056 2228 21283 2003 14203 1012 2066 1045 2843 1997 28442 1045 2228 2008 2256 2219 3268 2024 21283 2015 1012 2057 2024 2542 1998 2437 2256 2219 10828 1012 2061 2065 2057 2425 2055 2256 3268 2023 2003 2467 1999 1996 2433 102


INFO:tensorflow:input_ids: 101 1045 1005 2310 3191 2087 1997 1996 7928 2182 1012 1045 2234 2000 1996 7091 2008 2471 7955 10217 2008 1023 1013 2340 2003 1037 16880 3538 1997 2381 1012 2045 2024 1037 2261 2040 2228 2008 1996 2794 7984 2003 5410 1998 1026 7987 1013 1028 1026 7987 1013 1028 1045 5993 2008 1996 7984 2003 5410 1998 14203 1012 2055 2048 3428 4531 2169 2060 2067 2044 1996 7071 1998 1996 7656 6865 2121 2055 4116 1012 2021 1045 2123 1005 1056 2228 21283 2003 14203 1012 2066 1045 2843 1997 28442 1045 2228 2008 2256 2219 3268 2024 21283 2015 1012 2057 2024 2542 1998 2437 2256 2219 10828 1012 2061 2065 2057 2425 2055 2256 3268 2023 2003 2467 1999 1996 2433 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this is simply a classic film where the human voices coming from the animals are really what they ' re thoughts are . i don ' t know whether my video copy has a scene missing but it never shows how the dogs got out of the pit . it also shows an animals survival instinct and tracking abilities . put humans in the same position ant the helicopters would be out . for once an original film is improved by a remake as the voice - over for the first has been removed . only the use of animals can work in a film of this kind because using people would have had to spice out the story by turning it into murder , proving [SEP]


INFO:tensorflow:tokens: [CLS] this is simply a classic film where the human voices coming from the animals are really what they ' re thoughts are . i don ' t know whether my video copy has a scene missing but it never shows how the dogs got out of the pit . it also shows an animals survival instinct and tracking abilities . put humans in the same position ant the helicopters would be out . for once an original film is improved by a remake as the voice - over for the first has been removed . only the use of animals can work in a film of this kind because using people would have had to spice out the story by turning it into murder , proving [SEP]


INFO:tensorflow:input_ids: 101 2023 2003 3432 1037 4438 2143 2073 1996 2529 5755 2746 2013 1996 4176 2024 2428 2054 2027 1005 2128 4301 2024 1012 1045 2123 1005 1056 2113 3251 2026 2678 6100 2038 1037 3496 4394 2021 2009 2196 3065 2129 1996 6077 2288 2041 1997 1996 6770 1012 2009 2036 3065 2019 4176 7691 12753 1998 9651 7590 1012 2404 4286 1999 1996 2168 2597 14405 1996 12400 2052 2022 2041 1012 2005 2320 2019 2434 2143 2003 5301 2011 1037 12661 2004 1996 2376 1011 2058 2005 1996 2034 2038 2042 3718 1012 2069 1996 2224 1997 4176 2064 2147 1999 1037 2143 1997 2023 2785 2138 2478 2111 2052 2031 2018 2000 17688 2041 1996 2466 2011 3810 2009 2046 4028 1010 13946 102


INFO:tensorflow:input_ids: 101 2023 2003 3432 1037 4438 2143 2073 1996 2529 5755 2746 2013 1996 4176 2024 2428 2054 2027 1005 2128 4301 2024 1012 1045 2123 1005 1056 2113 3251 2026 2678 6100 2038 1037 3496 4394 2021 2009 2196 3065 2129 1996 6077 2288 2041 1997 1996 6770 1012 2009 2036 3065 2019 4176 7691 12753 1998 9651 7590 1012 2404 4286 1999 1996 2168 2597 14405 1996 12400 2052 2022 2041 1012 2005 2320 2019 2434 2143 2003 5301 2011 1037 12661 2004 1996 2376 1011 2058 2005 1996 2034 2038 2042 3718 1012 2069 1996 2224 1997 4176 2064 2147 1999 1037 2143 1997 2023 2785 2138 2478 2111 2052 2031 2018 2000 17688 2041 1996 2466 2011 3810 2009 2046 4028 1010 13946 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i couldn ' t believe my eyes when i watched nuremberg yesterday on dutch television . it starts very slowly , the backgrounds of the nuremberg trials become clear step by step , the germans have a funny english accent , but then , suddenly , in the last few minutes of the first part of the series , the audience gets to see the most shocking , horrific footage i have ever seen . < br / > < br / > it is important that people get to see such footage ( although i absolutely don ' t agree with people stating that there is no minimum age at which children can be exposed to this kind of material ) , but in this [SEP]


INFO:tensorflow:tokens: [CLS] i couldn ' t believe my eyes when i watched nuremberg yesterday on dutch television . it starts very slowly , the backgrounds of the nuremberg trials become clear step by step , the germans have a funny english accent , but then , suddenly , in the last few minutes of the first part of the series , the audience gets to see the most shocking , horrific footage i have ever seen . < br / > < br / > it is important that people get to see such footage ( although i absolutely don ' t agree with people stating that there is no minimum age at which children can be exposed to this kind of material ) , but in this [SEP]


INFO:tensorflow:input_ids: 101 1045 2481 1005 1056 2903 2026 2159 2043 1045 3427 19346 7483 2006 3803 2547 1012 2009 4627 2200 3254 1010 1996 15406 1997 1996 19346 7012 2468 3154 3357 2011 3357 1010 1996 7074 2031 1037 6057 2394 9669 1010 2021 2059 1010 3402 1010 1999 1996 2197 2261 2781 1997 1996 2034 2112 1997 1996 2186 1010 1996 4378 4152 2000 2156 1996 2087 16880 1010 23512 8333 1045 2031 2412 2464 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2003 2590 2008 2111 2131 2000 2156 2107 8333 1006 2348 1045 7078 2123 1005 1056 5993 2007 2111 5517 2008 2045 2003 2053 6263 2287 2012 2029 2336 2064 2022 6086 2000 2023 2785 1997 3430 1007 1010 2021 1999 2023 102


INFO:tensorflow:input_ids: 101 1045 2481 1005 1056 2903 2026 2159 2043 1045 3427 19346 7483 2006 3803 2547 1012 2009 4627 2200 3254 1010 1996 15406 1997 1996 19346 7012 2468 3154 3357 2011 3357 1010 1996 7074 2031 1037 6057 2394 9669 1010 2021 2059 1010 3402 1010 1999 1996 2197 2261 2781 1997 1996 2034 2112 1997 1996 2186 1010 1996 4378 4152 2000 2156 1996 2087 16880 1010 23512 8333 1045 2031 2412 2464 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2003 2590 2008 2111 2131 2000 2156 2107 8333 1006 2348 1045 7078 2123 1005 1056 5993 2007 2111 5517 2008 2045 2003 2053 6263 2287 2012 2029 2336 2064 2022 6086 2000 2023 2785 1997 3430 1007 1010 2021 1999 2023 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i was just a bit young for this one , but i had to see it . there ' s some excellent music , which many folks have mentioned , but no one seems to notice a very rare appearance by " angel " , a now mostly ignored but once quite popular musical outfit . wearing their trademark white outfits , they grind through " 20th century foxes " , and apparently all try to cr ##am into the camera ' s field of vision . keyboardist gregg gi ##uf ##fr ##ia remains the bands highlight , and has apparently never gotten much of a hair ##cut , ever ! cher ##ie currie ( ex - runaway ##s singer ) begins a brief , but [SEP]


INFO:tensorflow:tokens: [CLS] i was just a bit young for this one , but i had to see it . there ' s some excellent music , which many folks have mentioned , but no one seems to notice a very rare appearance by " angel " , a now mostly ignored but once quite popular musical outfit . wearing their trademark white outfits , they grind through " 20th century foxes " , and apparently all try to cr ##am into the camera ' s field of vision . keyboardist gregg gi ##uf ##fr ##ia remains the bands highlight , and has apparently never gotten much of a hair ##cut , ever ! cher ##ie currie ( ex - runaway ##s singer ) begins a brief , but [SEP]


INFO:tensorflow:input_ids: 101 1045 2001 2074 1037 2978 2402 2005 2023 2028 1010 2021 1045 2018 2000 2156 2009 1012 2045 1005 1055 2070 6581 2189 1010 2029 2116 12455 2031 3855 1010 2021 2053 2028 3849 2000 5060 1037 2200 4678 3311 2011 1000 4850 1000 1010 1037 2085 3262 6439 2021 2320 3243 2759 3315 11018 1012 4147 2037 11749 2317 22054 1010 2027 23088 2083 1000 3983 2301 24623 1000 1010 1998 4593 2035 3046 2000 13675 3286 2046 1996 4950 1005 1055 2492 1997 4432 1012 20173 18281 21025 16093 19699 2401 3464 1996 4996 12944 1010 1998 2038 4593 2196 5407 2172 1997 1037 2606 12690 1010 2412 999 24188 2666 20667 1006 4654 1011 19050 2015 3220 1007 4269 1037 4766 1010 2021 102


INFO:tensorflow:input_ids: 101 1045 2001 2074 1037 2978 2402 2005 2023 2028 1010 2021 1045 2018 2000 2156 2009 1012 2045 1005 1055 2070 6581 2189 1010 2029 2116 12455 2031 3855 1010 2021 2053 2028 3849 2000 5060 1037 2200 4678 3311 2011 1000 4850 1000 1010 1037 2085 3262 6439 2021 2320 3243 2759 3315 11018 1012 4147 2037 11749 2317 22054 1010 2027 23088 2083 1000 3983 2301 24623 1000 1010 1998 4593 2035 3046 2000 13675 3286 2046 1996 4950 1005 1055 2492 1997 4432 1012 20173 18281 21025 16093 19699 2401 3464 1996 4996 12944 1010 1998 2038 4593 2196 5407 2172 1997 1037 2606 12690 1010 2412 999 24188 2666 20667 1006 4654 1011 19050 2015 3220 1007 4269 1037 4766 1010 2021 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the late , great robert bloc ##h ( author of psycho , for those of you who weren ' t paying attention ) scripted this tale of terror and it was absolutely one of the scar ##iest movies i ever saw as a kid . ( i had to walk miles just to see a movie , and it was usually dark when i emerged from the theater ; seeing a horror movie was always un ##ner ##ving , but particularly so when it was as well - executed as this one . ) when i had the opportunity to see this one several years ago on video ##ta ##pe ( which should always be a last resort ) , i was surprised at how well [SEP]


INFO:tensorflow:tokens: [CLS] the late , great robert bloc ##h ( author of psycho , for those of you who weren ' t paying attention ) scripted this tale of terror and it was absolutely one of the scar ##iest movies i ever saw as a kid . ( i had to walk miles just to see a movie , and it was usually dark when i emerged from the theater ; seeing a horror movie was always un ##ner ##ving , but particularly so when it was as well - executed as this one . ) when i had the opportunity to see this one several years ago on video ##ta ##pe ( which should always be a last resort ) , i was surprised at how well [SEP]


INFO:tensorflow:input_ids: 101 1996 2397 1010 2307 2728 15984 2232 1006 3166 1997 18224 1010 2005 2216 1997 2017 2040 4694 1005 1056 7079 3086 1007 22892 2023 6925 1997 7404 1998 2009 2001 7078 2028 1997 1996 11228 10458 5691 1045 2412 2387 2004 1037 4845 1012 1006 1045 2018 2000 3328 2661 2074 2000 2156 1037 3185 1010 1998 2009 2001 2788 2601 2043 1045 6003 2013 1996 4258 1025 3773 1037 5469 3185 2001 2467 4895 3678 6455 1010 2021 3391 2061 2043 2009 2001 2004 2092 1011 6472 2004 2023 2028 1012 1007 2043 1045 2018 1996 4495 2000 2156 2023 2028 2195 2086 3283 2006 2678 2696 5051 1006 2029 2323 2467 2022 1037 2197 7001 1007 1010 1045 2001 4527 2012 2129 2092 102


INFO:tensorflow:input_ids: 101 1996 2397 1010 2307 2728 15984 2232 1006 3166 1997 18224 1010 2005 2216 1997 2017 2040 4694 1005 1056 7079 3086 1007 22892 2023 6925 1997 7404 1998 2009 2001 7078 2028 1997 1996 11228 10458 5691 1045 2412 2387 2004 1037 4845 1012 1006 1045 2018 2000 3328 2661 2074 2000 2156 1037 3185 1010 1998 2009 2001 2788 2601 2043 1045 6003 2013 1996 4258 1025 3773 1037 5469 3185 2001 2467 4895 3678 6455 1010 2021 3391 2061 2043 2009 2001 2004 2092 1011 6472 2004 2023 2028 1012 1007 2043 1045 2018 1996 4495 2000 2156 2023 2028 2195 2086 3283 2006 2678 2696 5051 1006 2029 2323 2467 2022 1037 2197 7001 1007 1010 1045 2001 4527 2012 2129 2092 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:Writing example 0 of 5000


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] when i saw this movie for the first time i didn ' t believe my own eyes . in front of me there was a great - and well done - parody of valentin ##o . . . see stan laurel bull ##fight that way is like to see an excellent fence ##r in action ! it ' s a very good parody , rich of ideas , with a clever and charming stan . . . old and good like whiskey . ( or the boo ##ze - up after that ) [SEP]


INFO:tensorflow:tokens: [CLS] when i saw this movie for the first time i didn ' t believe my own eyes . in front of me there was a great - and well done - parody of valentin ##o . . . see stan laurel bull ##fight that way is like to see an excellent fence ##r in action ! it ' s a very good parody , rich of ideas , with a clever and charming stan . . . old and good like whiskey . ( or the boo ##ze - up after that ) [SEP]


INFO:tensorflow:input_ids: 101 2043 1045 2387 2023 3185 2005 1996 2034 2051 1045 2134 1005 1056 2903 2026 2219 2159 1012 1999 2392 1997 2033 2045 2001 1037 2307 1011 1998 2092 2589 1011 12354 1997 24632 2080 1012 1012 1012 2156 9761 11893 7087 20450 2008 2126 2003 2066 2000 2156 2019 6581 8638 2099 1999 2895 999 2009 1005 1055 1037 2200 2204 12354 1010 4138 1997 4784 1010 2007 1037 12266 1998 11951 9761 1012 1012 1012 2214 1998 2204 2066 13803 1012 1006 2030 1996 22017 4371 1011 2039 2044 2008 1007 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2043 1045 2387 2023 3185 2005 1996 2034 2051 1045 2134 1005 1056 2903 2026 2219 2159 1012 1999 2392 1997 2033 2045 2001 1037 2307 1011 1998 2092 2589 1011 12354 1997 24632 2080 1012 1012 1012 2156 9761 11893 7087 20450 2008 2126 2003 2066 2000 2156 2019 6581 8638 2099 1999 2895 999 2009 1005 1055 1037 2200 2204 12354 1010 4138 1997 4784 1010 2007 1037 12266 1998 11951 9761 1012 1012 1012 2214 1998 2204 2066 13803 1012 1006 2030 1996 22017 4371 1011 2039 2044 2008 1007 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] alan rick ##man & emma thompson give good performances with southern / new orleans accents in this detective flick . it ' s worth seeing for their scenes - and rick ##man ' s scene with hal ho ##lb ##rook . these three actors mann ##age to entertain us no matter what the movie , it seems . the plot for the movie shows potential , but one gets the impression in watching the film that it was not pulled off as well as it could have been . the fact that it is cl ##uttered by a rather un ##int ##eres ##ting sub ##pl ##ot and mostly un ##int ##eres ##ting kidnap ##pers really mud ##dles things . the movie is worth a view - [SEP]


INFO:tensorflow:tokens: [CLS] alan rick ##man & emma thompson give good performances with southern / new orleans accents in this detective flick . it ' s worth seeing for their scenes - and rick ##man ' s scene with hal ho ##lb ##rook . these three actors mann ##age to entertain us no matter what the movie , it seems . the plot for the movie shows potential , but one gets the impression in watching the film that it was not pulled off as well as it could have been . the fact that it is cl ##uttered by a rather un ##int ##eres ##ting sub ##pl ##ot and mostly un ##int ##eres ##ting kidnap ##pers really mud ##dles things . the movie is worth a view - [SEP]


INFO:tensorflow:input_ids: 101 5070 6174 2386 1004 5616 5953 2507 2204 4616 2007 2670 1013 2047 5979 24947 1999 2023 6317 17312 1012 2009 1005 1055 4276 3773 2005 2037 5019 1011 1998 6174 2386 1005 1055 3496 2007 11085 7570 20850 25399 1012 2122 2093 5889 10856 4270 2000 20432 2149 2053 3043 2054 1996 3185 1010 2009 3849 1012 1996 5436 2005 1996 3185 3065 4022 1010 2021 2028 4152 1996 8605 1999 3666 1996 2143 2008 2009 2001 2025 2766 2125 2004 2092 2004 2009 2071 2031 2042 1012 1996 2755 2008 2009 2003 18856 23128 2011 1037 2738 4895 18447 18702 3436 4942 24759 4140 1998 3262 4895 18447 18702 3436 22590 7347 2428 8494 27822 2477 1012 1996 3185 2003 4276 1037 3193 1011 102


INFO:tensorflow:input_ids: 101 5070 6174 2386 1004 5616 5953 2507 2204 4616 2007 2670 1013 2047 5979 24947 1999 2023 6317 17312 1012 2009 1005 1055 4276 3773 2005 2037 5019 1011 1998 6174 2386 1005 1055 3496 2007 11085 7570 20850 25399 1012 2122 2093 5889 10856 4270 2000 20432 2149 2053 3043 2054 1996 3185 1010 2009 3849 1012 1996 5436 2005 1996 3185 3065 4022 1010 2021 2028 4152 1996 8605 1999 3666 1996 2143 2008 2009 2001 2025 2766 2125 2004 2092 2004 2009 2071 2031 2042 1012 1996 2755 2008 2009 2003 18856 23128 2011 1037 2738 4895 18447 18702 3436 4942 24759 4140 1998 3262 4895 18447 18702 3436 22590 7347 2428 8494 27822 2477 1012 1996 3185 2003 4276 1037 3193 1011 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] " del ##usion " is what you experience when you watch this flick and then believe you saw something worth ##while . this flick , which tells of a trio of semi - psycho travelers who are up to no good somewhere in the ca desert , is amateur ##ish and just plain stupid . the film suffers from an awful story , a lou ##sy screenplay , and some terrible direction just to mention a few of the deficit ##s . if the flick has anything at all going for it , it ' s b - movie diva rubin ' s even performance . don ' t waste your time on this turkey . ( d ) [SEP]


INFO:tensorflow:tokens: [CLS] " del ##usion " is what you experience when you watch this flick and then believe you saw something worth ##while . this flick , which tells of a trio of semi - psycho travelers who are up to no good somewhere in the ca desert , is amateur ##ish and just plain stupid . the film suffers from an awful story , a lou ##sy screenplay , and some terrible direction just to mention a few of the deficit ##s . if the flick has anything at all going for it , it ' s b - movie diva rubin ' s even performance . don ' t waste your time on this turkey . ( d ) [SEP]


INFO:tensorflow:input_ids: 101 1000 3972 14499 1000 2003 2054 2017 3325 2043 2017 3422 2023 17312 1998 2059 2903 2017 2387 2242 4276 19927 1012 2023 17312 1010 2029 4136 1997 1037 7146 1997 4100 1011 18224 15183 2040 2024 2039 2000 2053 2204 4873 1999 1996 6187 5532 1010 2003 5515 4509 1998 2074 5810 5236 1012 1996 2143 17567 2013 2019 9643 2466 1010 1037 10223 6508 9000 1010 1998 2070 6659 3257 2074 2000 5254 1037 2261 1997 1996 15074 2015 1012 2065 1996 17312 2038 2505 2012 2035 2183 2005 2009 1010 2009 1005 1055 1038 1011 3185 25992 20524 1005 1055 2130 2836 1012 2123 1005 1056 5949 2115 2051 2006 2023 4977 1012 1006 1040 1007 102 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1000 3972 14499 1000 2003 2054 2017 3325 2043 2017 3422 2023 17312 1998 2059 2903 2017 2387 2242 4276 19927 1012 2023 17312 1010 2029 4136 1997 1037 7146 1997 4100 1011 18224 15183 2040 2024 2039 2000 2053 2204 4873 1999 1996 6187 5532 1010 2003 5515 4509 1998 2074 5810 5236 1012 1996 2143 17567 2013 2019 9643 2466 1010 1037 10223 6508 9000 1010 1998 2070 6659 3257 2074 2000 5254 1037 2261 1997 1996 15074 2015 1012 2065 1996 17312 2038 2505 2012 2035 2183 2005 2009 1010 2009 1005 1055 1038 1011 3185 25992 20524 1005 1055 2130 2836 1012 2123 1005 1056 5949 2115 2051 2006 2023 4977 1012 1006 1040 1007 102 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] this movie is pathetic in every way possible . bad acting , horrible script ( was there one ? ) , terrible editing , lou ##sy cinematography , cheap humor . just plain horrible . < br / > < br / > i had seen ' the wish ##master ' a couple weeks before this movie and i thought it was a dead - ring ##er for worst movie of the year . then , i saw ' the pest ' and suddenly ' the wish ##master ' didn ' t seem so bad at all . < br / > < br / > bad bad bad . ex ##cr ##uc ##iating ##ly bad . [SEP]


INFO:tensorflow:tokens: [CLS] this movie is pathetic in every way possible . bad acting , horrible script ( was there one ? ) , terrible editing , lou ##sy cinematography , cheap humor . just plain horrible . < br / > < br / > i had seen ' the wish ##master ' a couple weeks before this movie and i thought it was a dead - ring ##er for worst movie of the year . then , i saw ' the pest ' and suddenly ' the wish ##master ' didn ' t seem so bad at all . < br / > < br / > bad bad bad . ex ##cr ##uc ##iating ##ly bad . [SEP]


INFO:tensorflow:input_ids: 101 2023 3185 2003 17203 1999 2296 2126 2825 1012 2919 3772 1010 9202 5896 1006 2001 2045 2028 1029 1007 1010 6659 9260 1010 10223 6508 16434 1010 10036 8562 1012 2074 5810 9202 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2018 2464 1005 1996 4299 8706 1005 1037 3232 3134 2077 2023 3185 1998 1045 2245 2009 2001 1037 2757 1011 3614 2121 2005 5409 3185 1997 1996 2095 1012 2059 1010 1045 2387 1005 1996 20739 1005 1998 3402 1005 1996 4299 8706 1005 2134 1005 1056 4025 2061 2919 2012 2035 1012 1026 7987 1013 1028 1026 7987 1013 1028 2919 2919 2919 1012 4654 26775 14194 15370 2135 2919 1012 102 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2023 3185 2003 17203 1999 2296 2126 2825 1012 2919 3772 1010 9202 5896 1006 2001 2045 2028 1029 1007 1010 6659 9260 1010 10223 6508 16434 1010 10036 8562 1012 2074 5810 9202 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 2018 2464 1005 1996 4299 8706 1005 1037 3232 3134 2077 2023 3185 1998 1045 2245 2009 2001 1037 2757 1011 3614 2121 2005 5409 3185 1997 1996 2095 1012 2059 1010 1045 2387 1005 1996 20739 1005 1998 3402 1005 1996 4299 8706 1005 2134 1005 1056 4025 2061 2919 2012 2035 1012 1026 7987 1013 1028 1026 7987 1013 1028 2919 2919 2919 1012 4654 26775 14194 15370 2135 2919 1012 102 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the 1970 ' s saw a rise and fall of what we have come to know as " blacks ##pl ##oit ##ation " films . the term is a reference to kind of broad catch - all , rather than a true genre of film . in short , any comedy , drama , adventure , western or urban cops & robbers shoot - em - up , that are so constructed and so cast as to appeal to the large urban black population of the mid 20th century . that indeed could embrace the wide ##st type of films , as long as the had a slant toward the inner - city black population . < br / > < br / > it appears [SEP]


INFO:tensorflow:tokens: [CLS] the 1970 ' s saw a rise and fall of what we have come to know as " blacks ##pl ##oit ##ation " films . the term is a reference to kind of broad catch - all , rather than a true genre of film . in short , any comedy , drama , adventure , western or urban cops & robbers shoot - em - up , that are so constructed and so cast as to appeal to the large urban black population of the mid 20th century . that indeed could embrace the wide ##st type of films , as long as the had a slant toward the inner - city black population . < br / > < br / > it appears [SEP]


INFO:tensorflow:input_ids: 101 1996 3359 1005 1055 2387 1037 4125 1998 2991 1997 2054 2057 2031 2272 2000 2113 2004 1000 10823 24759 28100 3370 1000 3152 1012 1996 2744 2003 1037 4431 2000 2785 1997 5041 4608 1011 2035 1010 2738 2084 1037 2995 6907 1997 2143 1012 1999 2460 1010 2151 4038 1010 3689 1010 6172 1010 2530 2030 3923 10558 1004 28019 5607 1011 7861 1011 2039 1010 2008 2024 2061 3833 1998 2061 3459 2004 2000 5574 2000 1996 2312 3923 2304 2313 1997 1996 3054 3983 2301 1012 2008 5262 2071 9979 1996 2898 3367 2828 1997 3152 1010 2004 2146 2004 1996 2018 1037 27474 2646 1996 5110 1011 2103 2304 2313 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 3544 102


INFO:tensorflow:input_ids: 101 1996 3359 1005 1055 2387 1037 4125 1998 2991 1997 2054 2057 2031 2272 2000 2113 2004 1000 10823 24759 28100 3370 1000 3152 1012 1996 2744 2003 1037 4431 2000 2785 1997 5041 4608 1011 2035 1010 2738 2084 1037 2995 6907 1997 2143 1012 1999 2460 1010 2151 4038 1010 3689 1010 6172 1010 2530 2030 3923 10558 1004 28019 5607 1011 7861 1011 2039 1010 2008 2024 2061 3833 1998 2061 3459 2004 2000 5574 2000 1996 2312 3923 2304 2313 1997 1996 3054 3983 2301 1012 2008 5262 2071 9979 1996 2898 3367 2828 1997 3152 1010 2004 2146 2004 1996 2018 1037 27474 2646 1996 5110 1011 2103 2304 2313 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 3544 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


#Creating a model

Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning).

In [13]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [14]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [15]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [16]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [17]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [18]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_train_distribute': None, '_num_worker_replicas': 1, '_save_checkpoints_steps': 500, '_protocol': None, '_evaluation_master': '', '_save_summary_steps': 100, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f473fa9ee10>, '_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_model_dir': 'OUTPUT_DIR_NAME', '_experimental_distribute': None, '_global_id_in_cluster': 0, '_experimental_max_worker_delay_secs': None, '_eval_distribute': None, '_tf_random_seed': None, '_save_checkpoints_secs': None, '_service': None, '_log_step_count_steps': 100, '_task_id': 0, '_num_ps_replicas': 0, '_device_fn': None, '_session_creation_timeout_secs': 7200, '_keep_checkpoint_max': 5, '_is_chief': True, '_master': '', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
}


INFO:tensorflow:Using config: {'_train_distribute': None, '_num_worker_replicas': 1, '_save_checkpoints_steps': 500, '_protocol': None, '_evaluation_master': '', '_save_summary_steps': 100, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f473fa9ee10>, '_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_model_dir': 'OUTPUT_DIR_NAME', '_experimental_distribute': None, '_global_id_in_cluster': 0, '_experimental_max_worker_delay_secs': None, '_eval_distribute': None, '_tf_random_seed': None, '_save_checkpoints_secs': None, '_service': None, '_log_step_count_steps': 100, '_task_id': 0, '_num_ps_replicas': 0, '_device_fn': None, '_session_creation_timeout_secs': 7200, '_keep_checkpoint_max': 5, '_is_chief': True, '_master': '', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
}


Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with Tensorflow [Estimators](https://www.tensorflow.org/guide/estimators).

In [19]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.

In [21]:
print("Beginning Training!")
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:loss = 0.70553577, step = 0


INFO:tensorflow:loss = 0.70553577, step = 0


INFO:tensorflow:global_step/sec: 0.55486


INFO:tensorflow:global_step/sec: 0.55486


INFO:tensorflow:loss = 0.34923077, step = 100 (180.227 sec)


INFO:tensorflow:loss = 0.34923077, step = 100 (180.227 sec)


INFO:tensorflow:global_step/sec: 0.632288


INFO:tensorflow:global_step/sec: 0.632288


INFO:tensorflow:loss = 0.047224686, step = 200 (158.156 sec)


INFO:tensorflow:loss = 0.047224686, step = 200 (158.156 sec)


INFO:tensorflow:global_step/sec: 0.631984


INFO:tensorflow:global_step/sec: 0.631984


INFO:tensorflow:loss = 0.193818, step = 300 (158.232 sec)


INFO:tensorflow:loss = 0.193818, step = 300 (158.232 sec)


INFO:tensorflow:global_step/sec: 0.630026


INFO:tensorflow:global_step/sec: 0.630026


INFO:tensorflow:loss = 0.23259044, step = 400 (158.723 sec)


INFO:tensorflow:loss = 0.23259044, step = 400 (158.723 sec)


INFO:tensorflow:Saving checkpoints for 468 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:Saving checkpoints for 468 into OUTPUT_DIR_NAME/model.ckpt.


INFO:tensorflow:Loss for final step: 0.0029134806.


INFO:tensorflow:Loss for final step: 0.0029134806.


Training took time  0:13:46.817874


Now let's use our test data to see how well our model did:

In [22]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [23]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-12-04T00:43:50Z


INFO:tensorflow:Starting evaluation at 2019-12-04T00:43:50Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from OUTPUT_DIR_NAME/model.ckpt-468


INFO:tensorflow:Restoring parameters from OUTPUT_DIR_NAME/model.ckpt-468


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-12-04-00:45:22


INFO:tensorflow:Finished evaluation at 2019-12-04-00:45:22


INFO:tensorflow:Saving dict for global step 468: auc = 0.87062633, eval_accuracy = 0.8706, f1_score = 0.87205845, false_negatives = 290.0, false_positives = 357.0, global_step = 468, loss = 0.49833378, precision = 0.8606557, recall = 0.88376755, true_negatives = 2148.0, true_positives = 2205.0


INFO:tensorflow:Saving dict for global step 468: auc = 0.87062633, eval_accuracy = 0.8706, f1_score = 0.87205845, false_negatives = 290.0, false_positives = 357.0, global_step = 468, loss = 0.49833378, precision = 0.8606557, recall = 0.88376755, true_negatives = 2148.0, true_positives = 2205.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: OUTPUT_DIR_NAME/model.ckpt-468


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 468: OUTPUT_DIR_NAME/model.ckpt-468


{'auc': 0.87062633,
 'eval_accuracy': 0.8706,
 'f1_score': 0.87205845,
 'false_negatives': 290.0,
 'false_positives': 357.0,
 'global_step': 468,
 'loss': 0.49833378,
 'precision': 0.8606557,
 'recall': 0.88376755,
 'true_negatives': 2148.0,
 'true_positives': 2205.0}

Now let's write code to make predictions on new sentences:

In [24]:
def getPrediction(in_sentences): # keep in mind in_sentences is usually an array of strings
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn, yield_single_examples = False) # added second param yield_single_examples
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [None]:
# file accessing
import pickle
x = pickle.load(open('DJIAOutfComplete2.dat', 'rb'))
#sort into dictionary by date
#second index: 1 = date 3 = headline # dictionary currently has an index issue

#dateDictionary = {}
#for i in range(len(x)):
#  dateKey = x[i][1]
#  if dateKey in dateDictionary:
#    dateDictionary[dateKey].append(x[i][3])
#  else:
#    dateDictionary[dateKey] = []
#    dateDictionary[dateKey].append(x[i][3])
datePredictions = []
tf.logging.set_verbosity(tf.logging.ERROR)
for i in range (len(x)):
  st = datetime.now()
  date = x[i][1]
  headline = x[i][3]
  datePredictions.append([date, getPrediction([headline])])
  if i % 10 == 0:
    print(str(i) + ": " + x[i][1] + ", " + str(datetime.now() - st))

f = open('datePredictions.pkl', 'wb')
pickle.dump(datePredictions, f)
f.close()



0: 2018-11-09, 0:00:06.807181
10: 2018-11-09, 0:00:06.726479
20: 2018-11-09, 0:00:06.585829
30: 2018-11-09, 0:00:07.044608
50: 2018-11-09, 0:00:06.988143
60: 2018-11-09, 0:00:06.865770
70: 2018-11-09, 0:00:06.814827
80: 2018-11-09, 0:00:05.929376
90: 2018-11-10, 0:00:05.949614
100: 2018-11-10, 0:00:06.714539
110: 2018-11-10, 0:00:06.687478
120: 2018-11-10, 0:00:06.744025
130: 2018-11-11, 0:00:05.812503
140: 2018-11-11, 0:00:06.592605
150: 2018-11-11, 0:00:06.579090
160: 2018-11-11, 0:00:06.789429
180: 2018-11-11, 0:00:06.658085
190: 2018-11-12, 0:00:06.824054
200: 2018-11-12, 0:00:06.800875
210: 2018-11-12, 0:00:06.742479
220: 2018-11-12, 0:00:07.237242
230: 2018-11-12, 0:00:06.989510
240: 2018-11-13, 0:00:06.746074
250: 2018-11-13, 0:00:06.281406
260: 2018-11-13, 0:00:06.230397
270: 2018-11-13, 0:00:07.066407
280: 2018-11-13, 0:00:07.208149
300: 2018-11-13, 0:00:07.471640
310: 2018-11-13, 0:00:06.043552
320: 2018-11-14, 0:00:06.047001
330: 2018-11-14, 0:00:07.023047
340: 2018-11-14, 0

['AXP',
 '2018-11-09',
 'The White House press secretary, Sarah Huckabee Sanders, posted a clip from a contributor to Infowars, the far-right site banned by YouTube, Facebook and Twitter.',
 'Trump Administration Uses Misleading Video to Justify Barring of CNN’s Jim Acosta',
 'The Trump administration relied on a misleadingly edited video from a contributor to the conspiracy site Infowars to help justify removing the credentials of CNN’s chief White House correspondent, a striking escalation in President Trump’s broadsides against the press.']

Voila! We have a sentiment classifier!

In [None]:
#print(datePredictions)



