In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

W0524 13:03:27.313066 139773168248640 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14


In [2]:
!pip install --user bert-tensorflow



In [3]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

In [4]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  data["sentiment"] = []
  for file_path in os.listdir(directory):
    with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
      data["sentence"].append(f.read())
      data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  pos_df["polarity"] = 1
  neg_df["polarity"] = 0
  return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets(force_download=False):
  dataset = tf.keras.utils.get_file(
      fname="aclImdb.tar.gz", 
      origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", 
      extract=True)
  
  train_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                       "aclImdb", "train"))
  test_df = load_dataset(os.path.join(os.path.dirname(dataset), 
                                      "aclImdb", "test"))
  
  return train_df, test_df


In [5]:
train, test = download_and_load_datasets()

In [6]:
#to train fast we have taken only 5000 sample of train and test
train = train.sample(5000)
test = test.sample(5000)

In [7]:
train.columns

Index(['sentence', 'sentiment', 'polarity'], dtype='object')

In [8]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1
label_list = [0, 1]

In [9]:
"""We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.

text_a is the text we want to classify, which in this case, is the Request field in our Dataframe.
text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
label is the label for our example, i.e. True, False
"""


"We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.\n\ntext_a is the text we want to classify, which in this case, is the Request field in our Dataframe.\ntext_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.\nlabel is the label for our example, i.e. True, False\n"

In [10]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

In [15]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

Instructions for updating:
Colocations handled automatically by placer.


W0524 12:19:09.277715 139713421719360 deprecation.py:323] From /home/vaibhav/.local/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0524 12:19:11.085018 139713421719360 saver.py:1483] Saver not created because there are no variables in the graph to restore


In [16]:
tokenizer.tokenize("This here's an example of using the BERT tokenizer.we are just checking it")

['this',
 'here',
 "'",
 's',
 'an',
 'example',
 'of',
 'using',
 'the',
 'bert',
 'token',
 '##izer',
 '.',
 'we',
 'are',
 'just',
 'checking',
 'it']

In [17]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 5000


I0524 12:19:28.476782 139713421719360 run_classifier.py:774] Writing example 0 of 5000


INFO:tensorflow:*** Example ***


I0524 12:19:28.480580 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:28.482042 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] for a movie that gets no respect there sure are a lot of memorable quotes listed for this gem . imagine a movie where joe pi ##sco ##po is actually funny ! maureen staple ##ton is a scene steal ##er . the mor ##oni character is an absolute scream . watch for alan " the skipper " hale jr . as a police sgt . [SEP]


I0524 12:19:28.483455 139713421719360 run_classifier.py:464] tokens: [CLS] for a movie that gets no respect there sure are a lot of memorable quotes listed for this gem . imagine a movie where joe pi ##sco ##po is actually funny ! maureen staple ##ton is a scene steal ##er . the mor ##oni character is an absolute scream . watch for alan " the skipper " hale jr . as a police sgt . [SEP]


INFO:tensorflow:input_ids: 101 2005 1037 3185 2008 4152 2053 4847 2045 2469 2024 1037 2843 1997 13432 16614 3205 2005 2023 17070 1012 5674 1037 3185 2073 3533 14255 9363 6873 2003 2941 6057 999 19167 18785 2669 2003 1037 3496 8954 2121 1012 1996 22822 10698 2839 2003 2019 7619 6978 1012 3422 2005 5070 1000 1996 23249 1000 13084 3781 1012 2004 1037 2610 17001 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.484617 139713421719360 run_classifier.py:465] input_ids: 101 2005 1037 3185 2008 4152 2053 4847 2045 2469 2024 1037 2843 1997 13432 16614 3205 2005 2023 17070 1012 5674 1037 3185 2073 3533 14255 9363 6873 2003 2941 6057 999 19167 18785 2669 2003 1037 3496 8954 2121 1012 1996 22822 10698 2839 2003 2019 7619 6978 1012 3422 2005 5070 1000 1996 23249 1000 13084 3781 1012 2004 1037 2610 17001 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.485648 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.486577 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0524 12:19:28.487485 139713421719360 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0524 12:19:28.492355 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:28.495852 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] most movies i can sit through easily , even if i do not particularly like the movie . i am the type of person who recognizes great films even if i do not like the genre . this is the first movie i could not stand to watch . cat in the hat is the worst movie i have ever seen - - and i ' ve seen a lot of movies . the acting is okay ( myers is good as the cat , it ' s just that he is really annoying ) . the silly songs the cat sings were boring and mono ##ton ##ous , even for the children in the audience . the plot drag ##s on and on , and [SEP]


I0524 12:19:28.496960 139713421719360 run_classifier.py:464] tokens: [CLS] most movies i can sit through easily , even if i do not particularly like the movie . i am the type of person who recognizes great films even if i do not like the genre . this is the first movie i could not stand to watch . cat in the hat is the worst movie i have ever seen - - and i ' ve seen a lot of movies . the acting is okay ( myers is good as the cat , it ' s just that he is really annoying ) . the silly songs the cat sings were boring and mono ##ton ##ous , even for the children in the audience . the plot drag ##s on and on , and [SEP]


INFO:tensorflow:input_ids: 101 2087 5691 1045 2064 4133 2083 4089 1010 2130 2065 1045 2079 2025 3391 2066 1996 3185 1012 1045 2572 1996 2828 1997 2711 2040 14600 2307 3152 2130 2065 1045 2079 2025 2066 1996 6907 1012 2023 2003 1996 2034 3185 1045 2071 2025 3233 2000 3422 1012 4937 1999 1996 6045 2003 1996 5409 3185 1045 2031 2412 2464 1011 1011 1998 1045 1005 2310 2464 1037 2843 1997 5691 1012 1996 3772 2003 3100 1006 13854 2003 2204 2004 1996 4937 1010 2009 1005 1055 2074 2008 2002 2003 2428 15703 1007 1012 1996 10021 2774 1996 4937 10955 2020 11771 1998 18847 2669 3560 1010 2130 2005 1996 2336 1999 1996 4378 1012 1996 5436 8011 2015 2006 1998 2006 1010 1998 102


I0524 12:19:28.497968 139713421719360 run_classifier.py:465] input_ids: 101 2087 5691 1045 2064 4133 2083 4089 1010 2130 2065 1045 2079 2025 3391 2066 1996 3185 1012 1045 2572 1996 2828 1997 2711 2040 14600 2307 3152 2130 2065 1045 2079 2025 2066 1996 6907 1012 2023 2003 1996 2034 3185 1045 2071 2025 3233 2000 3422 1012 4937 1999 1996 6045 2003 1996 5409 3185 1045 2031 2412 2464 1011 1011 1998 1045 1005 2310 2464 1037 2843 1997 5691 1012 1996 3772 2003 3100 1006 13854 2003 2204 2004 1996 4937 1010 2009 1005 1055 2074 2008 2002 2003 2428 15703 1007 1012 1996 10021 2774 1996 4937 10955 2020 11771 1998 18847 2669 3560 1010 2130 2005 1996 2336 1999 1996 4378 1012 1996 5436 8011 2015 2006 1998 2006 1010 1998 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:28.498948 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.499861 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0524 12:19:28.500749 139713421719360 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0524 12:19:28.503743 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:28.504667 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] i can agree with other comments that there wasn ' t an enormous amount of history discussed in the movie but it wasn ' t a documentary ! it was meant to entertain and i think it did a very good job at it . < br / > < br / > i agree with the black family . the scenes with them seemed out of place . like all of a sudden it would be thrown in but i did catch on to the story and the connection between the families later on and found it pretty good . < br / > < br / > despite it wasn ' t a re - enactment of the 60s it did bring into the [SEP]


I0524 12:19:28.505717 139713421719360 run_classifier.py:464] tokens: [CLS] i can agree with other comments that there wasn ' t an enormous amount of history discussed in the movie but it wasn ' t a documentary ! it was meant to entertain and i think it did a very good job at it . < br / > < br / > i agree with the black family . the scenes with them seemed out of place . like all of a sudden it would be thrown in but i did catch on to the story and the connection between the families later on and found it pretty good . < br / > < br / > despite it wasn ' t a re - enactment of the 60s it did bring into the [SEP]


INFO:tensorflow:input_ids: 101 1045 2064 5993 2007 2060 7928 2008 2045 2347 1005 1056 2019 8216 3815 1997 2381 6936 1999 1996 3185 2021 2009 2347 1005 1056 1037 4516 999 2009 2001 3214 2000 20432 1998 1045 2228 2009 2106 1037 2200 2204 3105 2012 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 5993 2007 1996 2304 2155 1012 1996 5019 2007 2068 2790 2041 1997 2173 1012 2066 2035 1997 1037 5573 2009 2052 2022 6908 1999 2021 1045 2106 4608 2006 2000 1996 2466 1998 1996 4434 2090 1996 2945 2101 2006 1998 2179 2009 3492 2204 1012 1026 7987 1013 1028 1026 7987 1013 1028 2750 2009 2347 1005 1056 1037 2128 1011 26465 1997 1996 20341 2009 2106 3288 2046 1996 102


I0524 12:19:28.506683 139713421719360 run_classifier.py:465] input_ids: 101 1045 2064 5993 2007 2060 7928 2008 2045 2347 1005 1056 2019 8216 3815 1997 2381 6936 1999 1996 3185 2021 2009 2347 1005 1056 1037 4516 999 2009 2001 3214 2000 20432 1998 1045 2228 2009 2106 1037 2200 2204 3105 2012 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1045 5993 2007 1996 2304 2155 1012 1996 5019 2007 2068 2790 2041 1997 2173 1012 2066 2035 1997 1037 5573 2009 2052 2022 6908 1999 2021 1045 2106 4608 2006 2000 1996 2466 1998 1996 4434 2090 1996 2945 2101 2006 1998 2179 2009 3492 2204 1012 1026 7987 1013 1028 1026 7987 1013 1028 2750 2009 2347 1005 1056 1037 2128 1011 26465 1997 1996 20341 2009 2106 3288 2046 1996 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:28.507606 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.508536 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0524 12:19:28.509460 139713421719360 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0524 12:19:28.514068 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:28.515782 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] hay ##ao mi ##ya ##zaki has no equal when it comes to using hand - drawn animation as a form of storytelling , yet often he is being compared to walt disney . that is just so unfair , because it becomes apparent by watching mi ##ya ##zaki ' s films that he is the superior artist . he really has a gift of thrilling both grown ##ups and children , and lap ##uta is indeed one awesome ride . < br / > < br / > but where can i begin to describe a movie so magical and breath ##taking ! mi ##ya ##zaki ' s works have never cease to ama ##ze me . lap ##uta is an adventure of a grand scale [SEP]


I0524 12:19:28.520082 139713421719360 run_classifier.py:464] tokens: [CLS] hay ##ao mi ##ya ##zaki has no equal when it comes to using hand - drawn animation as a form of storytelling , yet often he is being compared to walt disney . that is just so unfair , because it becomes apparent by watching mi ##ya ##zaki ' s films that he is the superior artist . he really has a gift of thrilling both grown ##ups and children , and lap ##uta is indeed one awesome ride . < br / > < br / > but where can i begin to describe a movie so magical and breath ##taking ! mi ##ya ##zaki ' s works have never cease to ama ##ze me . lap ##uta is an adventure of a grand scale [SEP]


INFO:tensorflow:input_ids: 101 10974 7113 2771 3148 18637 2038 2053 5020 2043 2009 3310 2000 2478 2192 1011 4567 7284 2004 1037 2433 1997 20957 1010 2664 2411 2002 2003 2108 4102 2000 10598 6373 1012 2008 2003 2074 2061 15571 1010 2138 2009 4150 6835 2011 3666 2771 3148 18637 1005 1055 3152 2008 2002 2003 1996 6020 3063 1012 2002 2428 2038 1037 5592 1997 26162 2119 4961 22264 1998 2336 1010 1998 5001 13210 2003 5262 2028 12476 4536 1012 1026 7987 1013 1028 1026 7987 1013 1028 2021 2073 2064 1045 4088 2000 6235 1037 3185 2061 8687 1998 3052 17904 999 2771 3148 18637 1005 1055 2573 2031 2196 13236 2000 25933 4371 2033 1012 5001 13210 2003 2019 6172 1997 1037 2882 4094 102


I0524 12:19:28.521435 139713421719360 run_classifier.py:465] input_ids: 101 10974 7113 2771 3148 18637 2038 2053 5020 2043 2009 3310 2000 2478 2192 1011 4567 7284 2004 1037 2433 1997 20957 1010 2664 2411 2002 2003 2108 4102 2000 10598 6373 1012 2008 2003 2074 2061 15571 1010 2138 2009 4150 6835 2011 3666 2771 3148 18637 1005 1055 3152 2008 2002 2003 1996 6020 3063 1012 2002 2428 2038 1037 5592 1997 26162 2119 4961 22264 1998 2336 1010 1998 5001 13210 2003 5262 2028 12476 4536 1012 1026 7987 1013 1028 1026 7987 1013 1028 2021 2073 2064 1045 4088 2000 6235 1037 3185 2061 8687 1998 3052 17904 999 2771 3148 18637 1005 1055 2573 2031 2196 13236 2000 25933 4371 2033 1012 5001 13210 2003 2019 6172 1997 1037 2882 4094 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:28.522395 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.523292 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0524 12:19:28.524134 139713421719360 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0524 12:19:28.535843 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:28.537221 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] sorry , not good . < br / > < br / > it starts out interesting , but loose ##s its way a few minutes into the movie . < br / > < br / > it does not help a lot that none of the normally great actors ( qu ##aid , glover , er ##me ##y , leto etc . ) delivers a really good performance . < br / > < br / > it might be owed to the fact that i saw a dubbed version ( german ) , but dennis qu ##aid ' s character was especially wooden and annoying , and danny glover does not really make for a bel ##ie ##vable villain . moreover , jared [SEP]


I0524 12:19:28.538138 139713421719360 run_classifier.py:464] tokens: [CLS] sorry , not good . < br / > < br / > it starts out interesting , but loose ##s its way a few minutes into the movie . < br / > < br / > it does not help a lot that none of the normally great actors ( qu ##aid , glover , er ##me ##y , leto etc . ) delivers a really good performance . < br / > < br / > it might be owed to the fact that i saw a dubbed version ( german ) , but dennis qu ##aid ' s character was especially wooden and annoying , and danny glover does not really make for a bel ##ie ##vable villain . moreover , jared [SEP]


INFO:tensorflow:input_ids: 101 3374 1010 2025 2204 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 4627 2041 5875 1010 2021 6065 2015 2049 2126 1037 2261 2781 2046 1996 3185 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2515 2025 2393 1037 2843 2008 3904 1997 1996 5373 2307 5889 1006 24209 14326 1010 20012 1010 9413 4168 2100 1010 24543 4385 1012 1007 18058 1037 2428 2204 2836 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2453 2022 12232 2000 1996 2755 2008 1045 2387 1037 9188 2544 1006 2446 1007 1010 2021 6877 24209 14326 1005 1055 2839 2001 2926 4799 1998 15703 1010 1998 6266 20012 2515 2025 2428 2191 2005 1037 19337 2666 12423 12700 1012 9308 1010 8334 102


I0524 12:19:28.540274 139713421719360 run_classifier.py:465] input_ids: 101 3374 1010 2025 2204 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 4627 2041 5875 1010 2021 6065 2015 2049 2126 1037 2261 2781 2046 1996 3185 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2515 2025 2393 1037 2843 2008 3904 1997 1996 5373 2307 5889 1006 24209 14326 1010 20012 1010 9413 4168 2100 1010 24543 4385 1012 1007 18058 1037 2428 2204 2836 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2453 2022 12232 2000 1996 2755 2008 1045 2387 1037 9188 2544 1006 2446 1007 1010 2021 6877 24209 14326 1005 1055 2839 2001 2926 4799 1998 15703 1010 1998 6266 20012 2515 2025 2428 2191 2005 1037 19337 2666 12423 12700 1012 9308 1010 8334 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:28.541590 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:28.542585 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0524 12:19:28.543465 139713421719360 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:Writing example 0 of 5000


I0524 12:19:43.123279 139713421719360 run_classifier.py:774] Writing example 0 of 5000


INFO:tensorflow:*** Example ***


I0524 12:19:43.125720 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:43.126744 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] if you haven ' t seen this film , do it . its a genre ##mi ##x as i ' ve never seen another . some very surreal scenes , some hilarious funny stuff , a film noir fell ##ing , musical numbers with a swing , sex scenes ( the 2nd best played orgasm on film , best ##ed only by sally ) , a pitch of orson welles blended together into a work of art . as an work of art it hasn ' t to be logical at the end , at least not logical for everybody ; - ) . i owned an tv copy on vhs but loaned it to an ex - girlfriend and now i can ' t get [SEP]


I0524 12:19:43.127630 139713421719360 run_classifier.py:464] tokens: [CLS] if you haven ' t seen this film , do it . its a genre ##mi ##x as i ' ve never seen another . some very surreal scenes , some hilarious funny stuff , a film noir fell ##ing , musical numbers with a swing , sex scenes ( the 2nd best played orgasm on film , best ##ed only by sally ) , a pitch of orson welles blended together into a work of art . as an work of art it hasn ' t to be logical at the end , at least not logical for everybody ; - ) . i owned an tv copy on vhs but loaned it to an ex - girlfriend and now i can ' t get [SEP]


INFO:tensorflow:input_ids: 101 2065 2017 4033 1005 1056 2464 2023 2143 1010 2079 2009 1012 2049 1037 6907 4328 2595 2004 1045 1005 2310 2196 2464 2178 1012 2070 2200 16524 5019 1010 2070 26316 6057 4933 1010 1037 2143 15587 3062 2075 1010 3315 3616 2007 1037 7370 1010 3348 5019 1006 1996 3416 2190 2209 13892 2006 2143 1010 2190 2098 2069 2011 8836 1007 1010 1037 6510 1997 25026 23447 19803 2362 2046 1037 2147 1997 2396 1012 2004 2019 2147 1997 2396 2009 8440 1005 1056 2000 2022 11177 2012 1996 2203 1010 2012 2560 2025 11177 2005 7955 1025 1011 1007 1012 1045 3079 2019 2694 6100 2006 17550 2021 13190 2009 2000 2019 4654 1011 6513 1998 2085 1045 2064 1005 1056 2131 102


I0524 12:19:43.128438 139713421719360 run_classifier.py:465] input_ids: 101 2065 2017 4033 1005 1056 2464 2023 2143 1010 2079 2009 1012 2049 1037 6907 4328 2595 2004 1045 1005 2310 2196 2464 2178 1012 2070 2200 16524 5019 1010 2070 26316 6057 4933 1010 1037 2143 15587 3062 2075 1010 3315 3616 2007 1037 7370 1010 3348 5019 1006 1996 3416 2190 2209 13892 2006 2143 1010 2190 2098 2069 2011 8836 1007 1010 1037 6510 1997 25026 23447 19803 2362 2046 1037 2147 1997 2396 1012 2004 2019 2147 1997 2396 2009 8440 1005 1056 2000 2022 11177 2012 1996 2203 1010 2012 2560 2025 11177 2005 7955 1025 1011 1007 1012 1045 3079 2019 2694 6100 2006 17550 2021 13190 2009 2000 2019 4654 1011 6513 1998 2085 1045 2064 1005 1056 2131 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:43.129288 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:43.130018 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0524 12:19:43.130704 139713421719360 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0524 12:19:43.140167 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:43.141253 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] while star trek the motion picture was mostly boring , star trek the final frontier is plain bad . in this terrible sequel , the crew is on shore leave when they get a distress signal from the federation that ambassadors representing earth , rom ##ulus and k ##ron ##os ( the k ##ling ##on home world ) have been kidnapped by a renegade vulcan bent on his quest to attain a stars ##hip to venture into the great barrier . there , he hopes to find god . using mystic ##ism and bad writing , he persuade ##s many of the senior officers of the enterprise to betray kirk and get a hold of the ship . they do reach the inside of the great [SEP]


I0524 12:19:43.142213 139713421719360 run_classifier.py:464] tokens: [CLS] while star trek the motion picture was mostly boring , star trek the final frontier is plain bad . in this terrible sequel , the crew is on shore leave when they get a distress signal from the federation that ambassadors representing earth , rom ##ulus and k ##ron ##os ( the k ##ling ##on home world ) have been kidnapped by a renegade vulcan bent on his quest to attain a stars ##hip to venture into the great barrier . there , he hopes to find god . using mystic ##ism and bad writing , he persuade ##s many of the senior officers of the enterprise to betray kirk and get a hold of the ship . they do reach the inside of the great [SEP]


INFO:tensorflow:input_ids: 101 2096 2732 10313 1996 4367 3861 2001 3262 11771 1010 2732 10313 1996 2345 8880 2003 5810 2919 1012 1999 2023 6659 8297 1010 1996 3626 2003 2006 5370 2681 2043 2027 2131 1037 12893 4742 2013 1996 4657 2008 20986 5052 3011 1010 17083 11627 1998 1047 4948 2891 1006 1996 1047 2989 2239 2188 2088 1007 2031 2042 11364 2011 1037 28463 25993 6260 2006 2010 8795 2000 18759 1037 3340 5605 2000 6957 2046 1996 2307 8803 1012 2045 1010 2002 8069 2000 2424 2643 1012 2478 17477 2964 1998 2919 3015 1010 2002 13984 2015 2116 1997 1996 3026 3738 1997 1996 6960 2000 20895 11332 1998 2131 1037 2907 1997 1996 2911 1012 2027 2079 3362 1996 2503 1997 1996 2307 102


I0524 12:19:43.143124 139713421719360 run_classifier.py:465] input_ids: 101 2096 2732 10313 1996 4367 3861 2001 3262 11771 1010 2732 10313 1996 2345 8880 2003 5810 2919 1012 1999 2023 6659 8297 1010 1996 3626 2003 2006 5370 2681 2043 2027 2131 1037 12893 4742 2013 1996 4657 2008 20986 5052 3011 1010 17083 11627 1998 1047 4948 2891 1006 1996 1047 2989 2239 2188 2088 1007 2031 2042 11364 2011 1037 28463 25993 6260 2006 2010 8795 2000 18759 1037 3340 5605 2000 6957 2046 1996 2307 8803 1012 2045 1010 2002 8069 2000 2424 2643 1012 2478 17477 2964 1998 2919 3015 1010 2002 13984 2015 2116 1997 1996 3026 3738 1997 1996 6960 2000 20895 11332 1998 2131 1037 2907 1997 1996 2911 1012 2027 2079 3362 1996 2503 1997 1996 2307 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:43.143951 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:43.144837 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0524 12:19:43.145593 139713421719360 run_classifier.py:468] label: 0 (id = 0)


INFO:tensorflow:*** Example ***


I0524 12:19:43.154443 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:43.155425 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] this was a movie of which i kept on reading the reviews again and again ; and despite it being played at film museum and not at path ##e theatres i decided to give this movie a try . the reasons were many in the reviews it was compared with pulp fiction , it had several parallel stories running in the movie and lastly it had already won 17 awards internationally in various categories . i was eager to see this movie and due to my off day at green ##pe ##ace i decided to make myself happy by going and seeing this movie . < br / > < br / > it is a story based in finland . i think it reflected the [SEP]


I0524 12:19:43.156162 139713421719360 run_classifier.py:464] tokens: [CLS] this was a movie of which i kept on reading the reviews again and again ; and despite it being played at film museum and not at path ##e theatres i decided to give this movie a try . the reasons were many in the reviews it was compared with pulp fiction , it had several parallel stories running in the movie and lastly it had already won 17 awards internationally in various categories . i was eager to see this movie and due to my off day at green ##pe ##ace i decided to make myself happy by going and seeing this movie . < br / > < br / > it is a story based in finland . i think it reflected the [SEP]


INFO:tensorflow:input_ids: 101 2023 2001 1037 3185 1997 2029 1045 2921 2006 3752 1996 4391 2153 1998 2153 1025 1998 2750 2009 2108 2209 2012 2143 2688 1998 2025 2012 4130 2063 13166 1045 2787 2000 2507 2023 3185 1037 3046 1012 1996 4436 2020 2116 1999 1996 4391 2009 2001 4102 2007 16016 4349 1010 2009 2018 2195 5903 3441 2770 1999 1996 3185 1998 22267 2009 2018 2525 2180 2459 2982 7587 1999 2536 7236 1012 1045 2001 9461 2000 2156 2023 3185 1998 2349 2000 2026 2125 2154 2012 2665 5051 10732 1045 2787 2000 2191 2870 3407 2011 2183 1998 3773 2023 3185 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2003 1037 2466 2241 1999 6435 1012 1045 2228 2009 7686 1996 102


I0524 12:19:43.156902 139713421719360 run_classifier.py:465] input_ids: 101 2023 2001 1037 3185 1997 2029 1045 2921 2006 3752 1996 4391 2153 1998 2153 1025 1998 2750 2009 2108 2209 2012 2143 2688 1998 2025 2012 4130 2063 13166 1045 2787 2000 2507 2023 3185 1037 3046 1012 1996 4436 2020 2116 1999 1996 4391 2009 2001 4102 2007 16016 4349 1010 2009 2018 2195 5903 3441 2770 1999 1996 3185 1998 22267 2009 2018 2525 2180 2459 2982 7587 1999 2536 7236 1012 1045 2001 9461 2000 2156 2023 3185 1998 2349 2000 2026 2125 2154 2012 2665 5051 10732 1045 2787 2000 2191 2870 3407 2011 2183 1998 3773 2023 3185 1012 1026 7987 1013 1028 1026 7987 1013 1028 2009 2003 1037 2466 2241 1999 6435 1012 1045 2228 2009 7686 1996 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:43.157599 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:43.158277 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0524 12:19:43.158938 139713421719360 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0524 12:19:43.162521 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:43.163258 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] ok so paris hilton sucks in it ( typical malibu barbie ) but the rest of the actors are just great ! i watched the film last night and it totally kept me going thru out the whole film . chad michael murray is soo ##oo ##oo ##oo ##oo ##oo ##oo ##oo ##oo ##o hot ##tt ##tt ##tt ##tt ##tt ##tt ##tt ##tt ##tt ##tt and he ' s just a ace actor . total hero man . the main girl who is the sister to nick ( chad ) is such a brilliant actress . thumbs up . i think it ' s so different to films out their these days , most of them to go with psycho ##path ' s , possessions , [SEP]


I0524 12:19:43.163957 139713421719360 run_classifier.py:464] tokens: [CLS] ok so paris hilton sucks in it ( typical malibu barbie ) but the rest of the actors are just great ! i watched the film last night and it totally kept me going thru out the whole film . chad michael murray is soo ##oo ##oo ##oo ##oo ##oo ##oo ##oo ##oo ##o hot ##tt ##tt ##tt ##tt ##tt ##tt ##tt ##tt ##tt ##tt and he ' s just a ace actor . total hero man . the main girl who is the sister to nick ( chad ) is such a brilliant actress . thumbs up . i think it ' s so different to films out their these days , most of them to go with psycho ##path ' s , possessions , [SEP]


INFO:tensorflow:input_ids: 101 7929 2061 3000 15481 19237 1999 2009 1006 5171 29047 22635 1007 2021 1996 2717 1997 1996 5889 2024 2074 2307 999 1045 3427 1996 2143 2197 2305 1998 2009 6135 2921 2033 2183 27046 2041 1996 2878 2143 1012 9796 2745 6264 2003 17111 9541 9541 9541 9541 9541 9541 9541 9541 2080 2980 4779 4779 4779 4779 4779 4779 4779 4779 4779 4779 1998 2002 1005 1055 2074 1037 9078 3364 1012 2561 5394 2158 1012 1996 2364 2611 2040 2003 1996 2905 2000 4172 1006 9796 1007 2003 2107 1037 8235 3883 1012 16784 2039 1012 1045 2228 2009 1005 1055 2061 2367 2000 3152 2041 2037 2122 2420 1010 2087 1997 2068 2000 2175 2007 18224 15069 1005 1055 1010 13689 1010 102


I0524 12:19:43.164677 139713421719360 run_classifier.py:465] input_ids: 101 7929 2061 3000 15481 19237 1999 2009 1006 5171 29047 22635 1007 2021 1996 2717 1997 1996 5889 2024 2074 2307 999 1045 3427 1996 2143 2197 2305 1998 2009 6135 2921 2033 2183 27046 2041 1996 2878 2143 1012 9796 2745 6264 2003 17111 9541 9541 9541 9541 9541 9541 9541 9541 2080 2980 4779 4779 4779 4779 4779 4779 4779 4779 4779 4779 1998 2002 1005 1055 2074 1037 9078 3364 1012 2561 5394 2158 1012 1996 2364 2611 2040 2003 1996 2905 2000 4172 1006 9796 1007 2003 2107 1037 8235 3883 1012 16784 2039 1012 1045 2228 2009 1005 1055 2061 2367 2000 3152 2041 2037 2122 2420 1010 2087 1997 2068 2000 2175 2007 18224 15069 1005 1055 1010 13689 1010 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:43.169468 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:43.176041 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


I0524 12:19:43.177511 139713421719360 run_classifier.py:468] label: 1 (id = 1)


INFO:tensorflow:*** Example ***


I0524 12:19:43.183623 139713421719360 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0524 12:19:43.185307 139713421719360 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] this film has some pretty gore ##y parts like a boo ##b getting bit off and a other big bites . castle freak himself is a good monster . i would be scared to pieces if he was coming after me . however , the movie had some dumb parts about it . < br / > < br / > a husband goes drunk driving and kills his 3 year old son and blinds his teenage daughter . i suppose death is a greater damage than blindness , but you ' d never know that the parents actually feel bad about their daughter being blind . all they care about is that " j . j . ' s dead ! " while their teenage [SEP]


I0524 12:19:43.186862 139713421719360 run_classifier.py:464] tokens: [CLS] this film has some pretty gore ##y parts like a boo ##b getting bit off and a other big bites . castle freak himself is a good monster . i would be scared to pieces if he was coming after me . however , the movie had some dumb parts about it . < br / > < br / > a husband goes drunk driving and kills his 3 year old son and blinds his teenage daughter . i suppose death is a greater damage than blindness , but you ' d never know that the parents actually feel bad about their daughter being blind . all they care about is that " j . j . ' s dead ! " while their teenage [SEP]


INFO:tensorflow:input_ids: 101 2023 2143 2038 2070 3492 13638 2100 3033 2066 1037 22017 2497 2893 2978 2125 1998 1037 2060 2502 15424 1012 3317 11576 2370 2003 1037 2204 6071 1012 1045 2052 2022 6015 2000 4109 2065 2002 2001 2746 2044 2033 1012 2174 1010 1996 3185 2018 2070 12873 3033 2055 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1037 3129 3632 7144 4439 1998 8563 2010 1017 2095 2214 2365 1998 28279 2010 9454 2684 1012 1045 6814 2331 2003 1037 3618 4053 2084 26290 1010 2021 2017 1005 1040 2196 2113 2008 1996 3008 2941 2514 2919 2055 2037 2684 2108 6397 1012 2035 2027 2729 2055 2003 2008 1000 1046 1012 1046 1012 1005 1055 2757 999 1000 2096 2037 9454 102


I0524 12:19:43.188408 139713421719360 run_classifier.py:465] input_ids: 101 2023 2143 2038 2070 3492 13638 2100 3033 2066 1037 22017 2497 2893 2978 2125 1998 1037 2060 2502 15424 1012 3317 11576 2370 2003 1037 2204 6071 1012 1045 2052 2022 6015 2000 4109 2065 2002 2001 2746 2044 2033 1012 2174 1010 1996 3185 2018 2070 12873 3033 2055 2009 1012 1026 7987 1013 1028 1026 7987 1013 1028 1037 3129 3632 7144 4439 1998 8563 2010 1017 2095 2214 2365 1998 28279 2010 9454 2684 1012 1045 6814 2331 2003 1037 3618 4053 2084 26290 1010 2021 2017 1005 1040 2196 2113 2008 1996 3008 2941 2514 2919 2055 2037 2684 2108 6397 1012 2035 2027 2729 2055 2003 2008 1000 1046 1012 1046 1012 1005 1055 2757 999 1000 2096 2037 9454 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0524 12:19:43.189905 139713421719360 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0524 12:19:43.191384 139713421719360 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


I0524 12:19:43.192875 139713421719360 run_classifier.py:468] label: 0 (id = 0)


In [25]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, rate=0.1)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


In [26]:
#we'll wrap our model function in a model_fn_builder function that adapts our model to work for training, evaluation, and prediction.
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.

def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn









In [27]:
# Compute train and warmup steps from batch size
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where the learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [28]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [29]:
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=r"/home/vaibhav/Downloads",
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [30]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': '/home/vaibhav/Downloads', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f110c560470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0524 12:29:22.130352 139713421719360 estimator.py:201] Using config: {'_model_dir': '/home/vaibhav/Downloads', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f110c560470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [31]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [None]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


I0524 12:29:50.133512 139713421719360 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0524 12:29:52.666393 139713421719360 saver.py:1483] Saver not created because there are no variables in the graph to restore


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


W0524 12:29:52.828431 139713421719360 deprecation.py:323] From /home/vaibhav/.local/lib/python3.6/site-packages/tensorflow/python/training/learning_rate_decay_v2.py:321: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


Instructions for updating:
Use tf.cast instead.


W0524 12:29:52.907159 139713421719360 deprecation.py:323] From /home/vaibhav/.local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Instructions for updating:
Use tf.cast instead.


W0524 12:29:59.357049 139713421719360 deprecation.py:323] From /home/vaibhav/.local/lib/python3.6/site-packages/tensorflow/python/ops/metrics_impl.py:455: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Done calling model_fn.


I0524 12:30:02.737222 139713421719360 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0524 12:30:02.740125 139713421719360 basic_session_run_hooks.py:527] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0524 12:30:06.192793 139713421719360 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0524 12:30:21.625602 139713421719360 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0524 12:30:21.889516 139713421719360 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into /home/vaibhav/Downloads/model.ckpt.


I0524 12:30:29.664873 139713421719360 basic_session_run_hooks.py:594] Saving checkpoints for 0 into /home/vaibhav/Downloads/model.ckpt.


In [None]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [None]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

In [None]:
#code to predict a new sentence
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]

In [None]:
pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

In [None]:
predictions = getPrediction(pred_sentences)