### Import Packages

In [1]:
"""BERT finetuning runner."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import csv
import os
import modeling
import optimization
import tokenization
import numpy as np
import tensorflow as tf
from sklearn import metrics
from seqeval.metrics import classification_report
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
!rm -rf ../logs

### Parameter Config

In [2]:
data_dir = '../data/atis'
log_dir = '../logs'

bert_model_dir = '../checkpoints/cased_L-12_H-768_A-12'
bert_config_file = os.path.join(bert_model_dir, 'bert_config.json')
vocab_file = os.path.join(bert_model_dir, 'vocab.txt')
init_checkpoint = os.path.join(bert_model_dir, 'bert_model.ckpt')

In [3]:
do_lower_case = False
do_train = True
do_eval = True
do_predict = True

max_seq_length = 50
train_batch_size = 32
eval_batch_size = 8
predict_batch_size = 100
learning_rate = 5e-5
num_train_epochs = 1
warmup_proportion = 0.1
save_checkpoints_steps = 1000
log_step_count_steps = 10
save_summary_steps = 1

### InputExample

In [4]:
class InputExample(object):
    """A single training/test example for simple sequence classification."""

    def __init__(self, guid, text, label):
        """Constructs a InputExample.

        Args:
          guid: Unique id for the example.
          text: string. The untokenized text of the first sequence.
          label: The label of the example. This should be
            specified for train and dev examples, but not for test examples.
        """
        self.guid = guid
        self.text = text
        self.label = label

In [5]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self,
                 input_ids,
                 input_mask,
                 segment_ids,
                 label_ids,
                 is_real_example=True):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_ids = label_ids
        self.is_real_example = is_real_example

### DataProcessor

In [6]:
class DataProcessor(object):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.train_path = os.path.join(self.data_dir, "train.tsv")
        self.dev_path = os.path.join(self.data_dir, "dev.tsv")
        self.test_path = os.path.join(self.data_dir, "test.tsv")

    def _read_tsv(cls, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with tf.gfile.Open(input_file, "r") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            lines = []
            for line in reader:
                lines.append(line)
            return lines

    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            guid = "%s-%s" % (set_type, i)
            text = tokenization.convert_to_unicode(line[1])
            label = tokenization.convert_to_unicode(line[2])
            examples.append(InputExample(guid=guid, text=text, label=label))
        return examples

    def get_train_examples(self):
        return self._create_examples(self._read_tsv(self.train_path), "train")

    def get_dev_examples(self):
        return self._create_examples(self._read_tsv(self.dev_path), "dev")

    def get_test_examples(self):
        return self._create_examples(self._read_tsv(self.test_path), "test")

    def get_labels_info(self):
        labels = []
        label_map = {}
        label_map_file = os.path.join(log_dir, "label_map.txt")
        lines = self._read_tsv(self.train_path) + \
                self._read_tsv(self.dev_path) + \
                self._read_tsv(self.test_path)

        for line in lines:
            labels += line[2].strip().split()
        labels.append("X")
        labels = sorted(set(labels), reverse=False)
        num_labels = sorted(set(labels), reverse=True).__len__()

        with tf.gfile.GFile(label_map_file, "w") as writer:
            for (i, label) in enumerate(labels):
                label_map[label] = i
                writer.write("{}:{}\n".format(i, label))
        return label_map, num_labels

In [7]:
def convert_single_example(ex_index, example, label_map, max_seq_length,
                           tokenizer):
    tokens_list = example.text.split(" ")
    labels_list = example.label.split(" ")
    tokens = []
    labels = []
    for i, (word, label) in enumerate(zip(tokens_list, labels_list)):
        token = tokenizer.tokenize(word)
        tokens.extend(token)
        for i, _ in enumerate(token):
            if i == 0:
                labels.append(label)
            else:
                labels.append("X")

    # only Account for [CLS]  with "- 1".
    if len(tokens) >= max_seq_length - 1:
        tokens = tokens[0:(max_seq_length - 1)]
        labels = labels[0:(max_seq_length - 1)]

    tokens.insert(0, "[CLS]")
    labels.insert(0, "O")
    segment_ids = [0] * max_seq_length
    label_ids = [label_map[label] for label in labels]
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        label_ids.append(label_map["O"])

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length
    assert len(label_ids) == max_seq_length

    if ex_index < 3:
        tf.logging.info("*** Example ***")
        tf.logging.info("guid: %s" % (example.guid))
        tf.logging.info("tokens: %s" % " ".join(
            [tokenization.printable_text(x) for x in tokens]))
        tf.logging.info(
            "input_ids: %s" % " ".join([str(x) for x in input_ids]))
        tf.logging.info(
            "input_mask: %s" % " ".join([str(x) for x in input_mask]))
        tf.logging.info(
            "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
        tf.logging.info(
            "label_ids: %s" % " ".join([str(x) for x in label_ids]))
    feature = InputFeatures(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        label_ids=label_ids)
    return feature

### Convert_examples_to_features

In [8]:
def file_based_convert_examples_to_features(
        examples, label_map, max_seq_length, tokenizer, output_file):
    """Convert a set of `InputExample`s to a TFRecord file."""
    writer = tf.python_io.TFRecordWriter(output_file)
    for (ex_index, example) in enumerate(examples):
        if ex_index % 2000 == 0:
            tf.logging.info(
                "Writing example %d of %d" % (ex_index, len(examples)))
        feature = convert_single_example(ex_index, example, label_map,
                                         max_seq_length, tokenizer)

        def create_int_feature(values):
            f = tf.train.Feature(
                int64_list=tf.train.Int64List(value=list(values)))
            return f

        features = collections.OrderedDict()
        features["input_ids"] = create_int_feature(feature.input_ids)
        features["input_mask"] = create_int_feature(feature.input_mask)
        features["segment_ids"] = create_int_feature(feature.segment_ids)
        features["label_ids"] = create_int_feature(feature.label_ids)
        tf_example = tf.train.Example(
            features=tf.train.Features(feature=features))
        writer.write(tf_example.SerializeToString())
    writer.close()

In [9]:
def file_based_input_fn_builder(input_file, seq_length, is_training,
                                drop_remainder, batch_size):
    """Creates an `input_fn` closure to be passed to Estimator."""
    name_to_features = {
        "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
        "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
        "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
        "label_ids": tf.FixedLenFeature([seq_length], tf.int64),
    }

    def _decode_record(record, name_to_features):
        """Decodes a record to a TensorFlow example."""
        return tf.parse_single_example(record, name_to_features)

    def input_fn():
        # For training, we want a lot of parallel reading and shuffling.
        # For eval, we want no shuffling and parallel reading doesn't matter.
        d = tf.data.TFRecordDataset(input_file)
        if is_training:
            d = d.repeat()
            d = d.shuffle(buffer_size=100)
        d = d.apply(
            tf.data.experimental.map_and_batch(
                lambda record: _decode_record(record, name_to_features),
                batch_size=batch_size,
                drop_remainder=drop_remainder))
        return d

    return input_fn

### Create Model

In [10]:
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels):
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids)

    # output_layer = model.get_pooled_output()
    output_layer = model.get_sequence_output()

    if is_training:
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.layers.dense(
        inputs=output_layer,
        units=num_labels,
        use_bias=True,
        bias_initializer=tf.zeros_initializer(),
        kernel_initializer=tf.truncated_normal_initializer(stddev=0.02))

    mask_length = tf.reduce_sum(input_mask, axis=1)

    mask = tf.cast(input_mask, dtype=tf.float32)
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    loss = tf.losses.softmax_cross_entropy(
        logits=logits, onehot_labels=one_hot_labels)
    loss *= tf.reshape(mask, [-1])
    loss = tf.reduce_sum(loss)
    total_size = tf.reduce_sum(mask)
    total_size += 1e-12  # to avoid division by 0 for all-0 weights
    loss /= total_size
    # predict not mask we could filtered it in the prediction part.
    probabilities = tf.math.softmax(logits, axis=-1)
    predict = tf.math.argmax(probabilities, axis=-1)

    #     with tf.variable_scope("crf_loss"):
    #         trans = tf.get_variable(
    #             "transition",
    #             shape=[num_labels, num_labels],
    #             initializer=tf.contrib.layers.xavier_initializer())
    #     log_likelihood, transition = tf.contrib.crf.crf_log_likelihood(
    #         inputs=logits,
    #         tag_indices=labels,
    #         sequence_lengths=mask_length,
    #         transition_params=trans)
    #     loss = tf.reduce_mean(-log_likelihood)
    #     decode_tags, best_score = tf.contrib.crf.crf_decode(
    #         potentials=logits,
    #         transition_params=transition,
    #         sequence_length=mask_length)

    return (loss, predict, mask_length)

In [11]:
def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
                     num_train_steps, num_warmup_steps):
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info(
                "  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, predict, mask_length) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids,
            label_ids, num_labels)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)


        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:
            accuracy = tf.metrics.accuracy(label_ids, predict)
            evl_metrics = {
                'accuracy': accuracy,
            }
            for metric_name, op in evl_metrics.items():
                tf.summary.scalar(metric_name, op[1])
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metric_ops=evl_metrics)
        else:
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                predictions={
                    "predicted_ids": predict,
                    "label_ids": label_ids,
                    "mask_length":mask_length,
                    "input_ids":input_ids
                })
        return output_spec

    return model_fn

### Train

In [12]:
tf.logging.set_verbosity(tf.logging.INFO)
if not do_train and not do_eval and not do_predict:
    raise ValueError(
        "At least one of `do_train`, `do_eval` or `do_predict' must be True.")
tf.gfile.MakeDirs(log_dir)
processor = DataProcessor(data_dir)
label_map, num_labels = processor.get_labels_info()
tokenization.validate_case_matches_checkpoint(do_lower_case, init_checkpoint)
bert_config = modeling.BertConfig.from_json_file(bert_config_file)

if max_seq_length > bert_config.max_position_embeddings:
    raise ValueError("Cannot use sequence length %d because the BERT model "
                     "was only trained up to sequence length %d" %
                     (max_seq_length, bert_config.max_position_embeddings))
tokenizer = tokenization.FullTokenizer(
    vocab_file=vocab_file, do_lower_case=do_lower_case)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
run_config = tf.estimator.RunConfig(
    model_dir=log_dir,
    session_config=config,
    save_checkpoints_steps=save_checkpoints_steps,
    log_step_count_steps=log_step_count_steps,
    save_summary_steps=save_summary_steps)
train_examples = None
num_train_steps = None
num_warmup_steps = None

if do_train:
    train_examples = processor.get_train_examples()
    num_train_steps = int(
        len(train_examples) / train_batch_size * num_train_epochs)
    num_warmup_steps = int(num_train_steps * warmup_proportion)

model_fn = model_fn_builder(
    bert_config=bert_config,
    num_labels=num_labels,
    init_checkpoint=init_checkpoint,
    learning_rate=learning_rate,
    num_train_steps=num_train_steps,
    num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)

# Training
if do_train:
    train_file = os.path.join(log_dir, "train.tf_record")
    file_based_convert_examples_to_features(
        train_examples, label_map, max_seq_length, tokenizer, train_file)
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num examples = %d", len(train_examples))
    tf.logging.info("  Batch size = %d", train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=max_seq_length,
        is_training=True,
        drop_remainder=False,
        batch_size=train_batch_size)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

INFO:tensorflow:Using config: {'_model_dir': '../logs', '_tf_random_seed': None, '_save_summary_steps': 1, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': gpu_options {
  allow_growth: true
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f26d6145ef0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Writing example 0 of 4478
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: train-0
INFO:tensorflow:tokens: [CLS] i want to fly from b ##al ##ti ##more to da ##llas round trip
INFO:tensorflow:input_ids: 101 178 1328 1106 4689 1121 171 13

INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_1/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKP

INFO:tensorflow:  name = bert/encoder/layer_5/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_5/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorfl

INFO:tensorflow:  name = bert/encoder/layer_10/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_10/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  

### Evaluate

In [13]:
if do_eval:
    eval_examples = processor.get_dev_examples()
    num_actual_eval_examples = len(eval_examples)
    eval_file = os.path.join(log_dir, "eval.tf_record")
    file_based_convert_examples_to_features(
        eval_examples, label_map, max_seq_length, tokenizer, eval_file)
    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(eval_examples), num_actual_eval_examples,
                    len(eval_examples) - num_actual_eval_examples)
    tf.logging.info("  Batch size = %d", eval_batch_size)

    eval_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=max_seq_length,
        is_training=False,
        drop_remainder=False,
        batch_size=eval_batch_size)

    result = estimator.evaluate(input_fn=eval_input_fn)

    output_eval_file = os.path.join(log_dir, "eval_results.txt")
    with tf.gfile.GFile(output_eval_file, "w") as writer:
        tf.logging.info("***** Eval results *****")
        for key in sorted(result.keys()):
            tf.logging.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

INFO:tensorflow:Writing example 0 of 500
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: dev-0
INFO:tensorflow:tokens: [CLS] i want to fly from b ##ost ##on at 83 ##8 am and arrive in den ##ver at 111 ##0 in the morning
INFO:tensorflow:input_ids: 101 178 1328 1106 4689 1121 171 15540 1320 1120 6032 1604 1821 1105 6657 1107 10552 4121 1120 11084 1568 1107 1103 2106 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:label_ids: 126 126 126 126 126 126 48 127 127 126 35 127 99 126 126 126 78 127 126 14 127 126 126 12 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: dev-1
INFO:tensorflow:tokens: [CLS] show me

INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder

INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflo

INFO:tensorflow:  name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = dense/kernel:0, shape = (768, 128)
INFO:tensorflow:  name = dense/bias:0, shape = (128,)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-07-22T05:14:35Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ../logs/model.ckpt-139
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-07-22-05:14:40
INFO:tensorflow:Saving dict for global step 139: accuracy = 0.95268, global_step = 139, loss = 0.18315922
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 139: ../logs/model.ckpt-139
INFO:tensorflow:***** Eval results *****
INFO:tensorflow:  accuracy = 0.95268
INFO:tensorflow:  global_step = 139
INFO:tensorflow:  loss = 0.18315922


### Predict

In [14]:
if do_predict:

    predict_examples = processor.get_test_examples()
    num_actual_predict_examples = len(predict_examples)

    predict_file = os.path.join(log_dir, "predict.tf_record")
    if not tf.gfile.Exists(predict_file):
        file_based_convert_examples_to_features(predict_examples, label_map,
                                                max_seq_length, tokenizer,
                                                predict_file)

    tf.logging.info("***** Running prediction*****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(predict_examples), num_actual_predict_examples,
                    len(predict_examples) - num_actual_predict_examples)
    tf.logging.info("  Batch size = %d", predict_batch_size)

    predict_input_fn = file_based_input_fn_builder(
        input_file=predict_file,
        seq_length=max_seq_length,
        is_training=False,
        drop_remainder=False,
        batch_size=predict_batch_size)

    result = estimator.predict(input_fn=predict_input_fn)
    label_map_new = {v: k for k, v in label_map.items()}

    output_predict_file = os.path.join(log_dir, "test_results.tsv")
    with tf.gfile.GFile(output_predict_file, "w") as writer:
        true_list = []
        predict_list = []
        for item in result:
            mask_length = item["mask_length"]
            label_ids = item["label_ids"][:mask_length].tolist()
            predicted_ids = item["predicted_ids"][:mask_length].tolist()
            input_ids = item["input_ids"][:mask_length]
            tokens = tokenizer.convert_ids_to_tokens(input_ids)
            
            del tokens[0]
            del label_ids[0]
            del predicted_ids[0]
            
            true_tags = []
            pre_tags = []
            for index, word in enumerate(tokens):
                if not word.startswith("##"):
                    true_tags.append(label_map_new[label_ids[index]])
                    pre_tags.append(label_map_new[predicted_ids[index]])
            
            true_list.append(true_tags)
            predict_list.append(pre_tags)

            

#             for i in range(len(tokens)):
#                 if tokens[i].startswith("[CLS]") or tokens[i].startswith("##"):
#                     pass
#                 elif true_tags[i].startswith("X") or pre_tags[i].startswith(
#                         "X"):
#                     pass
#                 else:
#                     output_format = "{} {} {}\n".format(
#                         tokens[i], true_tags[i], pre_tags[i].strip())
#                     writer.write(output_format)
#             writer.write("\n")
    print(classification_report(true_list, predict_list))

INFO:tensorflow:Writing example 0 of 893
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: test-0
INFO:tensorflow:tokens: [CLS] i would like to find a flight from ch ##ar ##lot ##te to las ve ##gas that makes a stop in s ##t . lo ##ui ##s
INFO:tensorflow:input_ids: 101 178 1156 1176 1106 1525 170 3043 1121 22572 1813 7841 1566 1106 17496 1396 11305 1115 2228 170 1831 1107 188 1204 119 25338 6592 1116 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:label_ids: 126 126 126 126 126 126 126 126 126 48 127 127 127 126 78 123 127 126 126 126 126 126 71 127 127 119 127 127 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: test-1
INFO:t

INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_3/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/

INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = dense/kernel:0, shape = (768, 128)
INFO:tensorflow:  name = dense/bias:0, shape = (128,)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../logs/model.ckpt-139
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
                            precision    recall  f1-score   support

              airport_name       0.00      0.00      0.00        21
           toloc.city_name       0.38      0.47      0.42       755
         fromloc.city_name       0.38      0.51      0.43       732
           fare_basis_code       0.67      0.59      0.62        17
        toloc.airport_code       0.00      0.00      0.00         4
 depart_time.time_relative       0.32      0.60      0.42        65
          t