In [3]:
%load_ext autoreload
%autoreload 2

import tensorflow.compat.v1 as tf
import numpy as np
import pandas as pd
# modeling은 직접 수정한(position, token embedding을 뺀), 모델로 사용할 것이므로, salt_bert를 활용하면 안됩니다.
import modeling
import sys
sys.path.append(os.path.dirname(os.path.abspath('')))
from salt_bert.make_preprocessed_data import tokenization
from salt_bert.make_bert_model import optimization
import random
import logging
import os

random_seed=777
rng = random.Random(random_seed)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(physical_devices[0],'GPU')
tf.config.experimental.set_memory_growth(physical_devices[0],True)

dupe_factor = 100
masked_lm_prob=0.8
max_predictions_per_seq=4
max_seq_length = 7

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import collections
# masked 처리
def create_masked_lm_predictions(tokens, vocab_words, masked_lm_prob=0.15, max_predictions_per_seq=20, rng=rng):
    """Creates the predictions for the masked LM objective."""

    cand_indexes = []
    for (i, token) in enumerate(tokens):			# [MASK] 를 삽입할 token 후보 리스트를 생성
        if token == "[CLS]" or token == "[SEP]":
            continue
        cand_indexes.append(i)

    rng.shuffle(cand_indexes)

    output_tokens = list(tokens)

    masked_lm = collections.namedtuple("masked_lm", ["index", "label"])    # pylint: disable=invalid-name

    num_to_predict = min(max_predictions_per_seq,					# [MASK] 를 삽입할 token의 개수를 현재 token 중에서 15%의 확률로 선택 
                                             max(1, int(round(len(tokens) * masked_lm_prob))))	# 만약 그 값이 20보다 크다면, 20개의 token만 [MASK]로 변환

    masked_lms = []
    covered_indexes = set()
    for index in cand_indexes:
        if len(masked_lms) >= num_to_predict:					# 앞에서 지정한 개수 만큼한 [MASK]
            break
        if index in covered_indexes:
            continue
        covered_indexes.add(index)

        masked_token = None
        # 80% of the time, replace with [MASK]
        if rng.random() < 0.8:							# 그 중, 80%는 [MASK]로 변환
            masked_token = "[MASK]"
        else:
            # 10% of the time, keep original
            if rng.random() < 0.5:							# 나머지 20%의 확률 중, 절반인 10%는 원본 유지
                masked_token = tokens[index]
            # 10% of the time, replace with random word
            else:									# 나머지 10%는 랜덤으로 replace
                masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]

        output_tokens[index] = masked_token

        masked_lms.append(masked_lm(index=index, label=tokens[index]))

    masked_lms = sorted(masked_lms, key=lambda x: x.index)

    masked_lm_positions = []
    masked_lm_labels = []
    for p in masked_lms:
        masked_lm_positions.append(p.index)
        masked_lm_labels.append(p.label)

    return (output_tokens, masked_lm_positions, masked_lm_labels)

In [3]:
# 로그 생성
logger = logging.getLogger()

# 로그의 출력 기준 설정
logger.setLevel(logging.INFO)

# log 출력 형식
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# log 출력
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

# log를 파일에 출력
# file_handler = logging.FileHandler('my.log')
# file_handler.setFormatter(formatter)
# logger.addHandler(file_handler)


target_strt_date_dt='2020-01-01'
target_strt_str_dt=target_strt_date_dt.replace('-','')
target_end_date_dt='2022-01-01'
target_end_str_dt=target_end_date_dt.replace('-','')



In [4]:
import pandas as pd

# 데이터는 직접 불러와주세요

main_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   str_ymd   731 non-null    object
 1   year      731 non-null    int64 
 2   mon       731 non-null    int64 
 3   day       731 non-null    int64 
 4   day_kor   731 non-null    object
 5   weekend   731 non-null    object
 6   holiday   731 non-null    object
 7   str_duty  731 non-null    object
dtypes: int64(3), object(5)
memory usage: 45.8+ KB


In [5]:
# data 전처리를 진행해 주세요.

In [7]:
class TrainingInstance(object):
    """A single training instance (sentence pair)."""

    def __init__(self, tokens, masked_lm_positions, masked_lm_labels):
        self.tokens = tokens
        self.masked_lm_positions = masked_lm_positions
        self.masked_lm_labels = masked_lm_labels

    def __str__(self):
        s = ""
        s += "tokens: %s\n" % (" ".join(
                [tokenization.printable_text(x) for x in self.tokens]))
        s += "masked_lm_positions: %s\n" % (" ".join(
                [str(x) for x in self.masked_lm_positions]))
        s += "masked_lm_labels: %s\n" % (" ".join(
                [tokenization.printable_text(x) for x in self.masked_lm_labels]))
        s += "\n"
        return s

    def __repr__(self):
        return self.__str__()

    
def create_float_feature(values):
    feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
    return feature

def create_int_feature(values):
    feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
    return feature
    
def write_instance_to_example_files(instances, tokenizer, max_seq_length, max_predictions_per_seq, output_files):
    """Create TF example files from `TrainingInstance`s."""
    writers = []
    for output_file in output_files:
        writers.append(tf.io.TFRecordWriter(output_file))

    writer_index = 0

    total_written = 0
    for (inst_index, instance) in enumerate(instances):
        input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
        assert len(input_ids) <= max_seq_length

        while len(input_ids) < max_seq_length:
            input_ids.append(0)

        assert len(input_ids) == max_seq_length

        masked_lm_positions = list(instance.masked_lm_positions)
        masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
        masked_lm_weights = [1.0] * len(masked_lm_ids)

        while len(masked_lm_positions) < max_predictions_per_seq:
            masked_lm_positions.append(0)
            masked_lm_ids.append(0)
            masked_lm_weights.append(0.0)

        features = collections.OrderedDict()
        features["input_ids"] = create_int_feature(input_ids)
        features["masked_lm_positions"] = create_int_feature(masked_lm_positions)
        features["masked_lm_ids"] = create_int_feature(masked_lm_ids)
        features["masked_lm_weights"] = create_float_feature(masked_lm_weights)

        tf_example = tf.train.Example(features=tf.train.Features(feature=features))

        writers[writer_index].write(tf_example.SerializeToString())
        writer_index = (writer_index + 1) % len(writers)

        total_written += 1

        if inst_index < 20:
            logger.info("*** Example ***")
            logger.info("tokens: %s" % " ".join([tokenization.printable_text(x) for x in instance.tokens]))

            for feature_name in features.keys():
                feature = features[feature_name]
                values = []
                if feature.int64_list.value:
                    values = feature.int64_list.value
                elif feature.float_list.value:
                    values = feature.float_list.value
                logger.info("%s: %s" % (feature_name, " ".join([str(x) for x in values])))

    for writer in writers:
        writer.close()

    logger.info("Wrote %d total instances", total_written)
    
def create_instances_from_document(
        all_documents, document_index, masked_lm_prob, max_predictions_per_seq, vocab_words, rng):
    """Creates `TrainingInstance`s for a single document."""
    document = all_documents[document_index]

    instances = []
    current_chunk = []
    current_length = 0
    i = 0
    
    input_token = []
    # CLS/SEP 제외
    # input_token.append("[CLS]")
    input_token.extend(document)
    # input_token.append("[SEP]")

    (tokens, masked_lm_positions,
     masked_lm_labels) = create_masked_lm_predictions(input_token, vocab_words, masked_lm_prob, max_predictions_per_seq, rng)		# 만들어진 전체 sequence의 특정 token index에 [MASK] 를 수행
    instance = TrainingInstance(
            tokens=input_token,
            masked_lm_positions=masked_lm_positions,
            masked_lm_labels=masked_lm_labels)
    instances.append(instance)

    return instances

In [8]:
train_features = # pre training에 사용될 feature값
# vocab.list는 직접 만들어주셔야 합니다. 데이터는 제공되지 않아요!
tokenizer = tokenization.FullTokenizer(vocab_file='./vocab.list', do_lower_case=False)
vocab_words = list(tokenizer.vocab.keys())

instances = []
for _ in range(dupe_factor):
    for document_index in range(len(train_features)):
        instances.extend(
                create_instances_from_document(
                        train_features, document_index, masked_lm_prob, max_predictions_per_seq, vocab_words, rng))
rng.shuffle(instances)

In [9]:
write_instance_to_example_files(instances, tokenizer, max_seq_length, max_predictions_per_seq, ["./test.tf_record"])

2022-01-19 14:23:12,466 - root - INFO - *** Example ***
2022-01-19 14:23:12,467 - root - INFO - tokens: N O O D D E O
2022-01-19 14:23:12,467 - root - INFO - input_ids: 5 6 6 3 3 4 6
2022-01-19 14:23:12,468 - root - INFO - input_mask: 1 1 1 1 1 1 1
2022-01-19 14:23:12,468 - root - INFO - masked_lm_positions: 0 3 4 6
2022-01-19 14:23:12,468 - root - INFO - masked_lm_ids: 5 3 3 6
2022-01-19 14:23:12,469 - root - INFO - masked_lm_weights: 1.0 1.0 1.0 1.0
2022-01-19 14:23:12,470 - root - INFO - *** Example ***
2022-01-19 14:23:12,470 - root - INFO - tokens: E O N N O O D
2022-01-19 14:23:12,470 - root - INFO - input_ids: 4 6 5 5 6 6 3
2022-01-19 14:23:12,470 - root - INFO - input_mask: 1 1 1 1 1 1 1
2022-01-19 14:23:12,471 - root - INFO - masked_lm_positions: 1 3 4 5
2022-01-19 14:23:12,471 - root - INFO - masked_lm_ids: 6 5 6 6
2022-01-19 14:23:12,471 - root - INFO - masked_lm_weights: 1.0 1.0 1.0 1.0
2022-01-19 14:23:12,472 - root - INFO - *** Example ***
2022-01-19 14:23:12,472 - root -

In [10]:
# BERT Config는 적절히 수정하셔서 사용하셔야됩니다. (vocab_size같은거)
bert_config_file = './config/bert_config.json'
do_train = True
do_eval = True
input_file = './test.tf_record'
output_dir = './model_output'
save_checkpoints_steps = 10
learning_rate = 5e-5
train_batch_size = 14
eval_batch_size = 14
num_train_steps = 100
num_warmup_steps= -1
max_eval_steps=20

In [11]:
def gather_indexes(sequence_tensor, positions):
    """Gathers the vectors at the specific positions over a minibatch."""
    sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)
    batch_size = sequence_shape[0]
    seq_length = sequence_shape[1]
    width = sequence_shape[2]

    flat_offsets = tf.reshape(
        tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
    flat_positions = tf.reshape(positions + flat_offsets, [-1])
    flat_sequence_tensor = tf.reshape(sequence_tensor,
                                      [batch_size * seq_length, width])
    output_tensor = tf.gather(flat_sequence_tensor, flat_positions)
    return output_tensor

In [12]:
def get_masked_lm_output(bert_config, input_tensor, output_weights, positions,
                         label_ids, label_weights):
    """Get loss and log probs for the masked LM."""
    # 텐서 값의 특정 포지션을 가져온다. 여기서는 masking 처리된 값만 가져옴.
    input_tensor = gather_indexes(input_tensor, positions)

    with tf.variable_scope("cls/predictions"):
        with tf.variable_scope("transform"):
            input_tensor = tf.layers.dense(
                input_tensor,
                units=bert_config.hidden_size,
                activation=modeling.get_activation(bert_config.hidden_act),
                kernel_initializer=modeling.create_initializer(
                    bert_config.initializer_range))
            input_tensor = modeling.layer_norm(input_tensor)

        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        output_bias = tf.get_variable(
            "output_bias",
            shape=[bert_config.vocab_size],
            initializer=tf.zeros_initializer())

        logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        label_ids = tf.reshape(label_ids, [-1])
        label_weights = tf.reshape(label_weights, [-1])

        one_hot_labels = tf.one_hot(
            label_ids, depth=bert_config.vocab_size, dtype=tf.float32)

        per_example_loss = - \
            tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
        numerator = tf.reduce_sum(label_weights * per_example_loss)
        denominator = tf.reduce_sum(label_weights) + 1e-5
        loss = numerator / denominator

    return (loss, per_example_loss, log_probs)

In [13]:
def model_fn_builder(bert_config, init_checkpoint, learning_rate,
                     num_train_steps, num_warmup_steps, use_tpu,
                     use_one_hot_embeddings):

    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss,
         masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(), model.get_embedding_table(),
             masked_lm_positions, masked_lm_ids, masked_lm_weights)

        # 2개의 예측에 대한 토탈 로스
        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(
                        init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            # 토탈 로스를 최소화 하기위한 옵티마이저
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # 검증에서는 비교적 상세 정보를 출력해주도록 함.
            def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                          masked_lm_weights):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                                 [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(
                    masked_lm_log_probs, axis=-1, output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(
                    masked_lm_example_loss, [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError(
                "Only TRAIN and EVAL modes are supported: %s" % (mode))

        return output_spec

    return model_fn

In [14]:
def _decode_record(record, name_to_features):
    """Decodes a record to a TensorFlow example."""
    example = tf.io.parse_single_example(record, name_to_features)

    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
    # So cast all int64 to int32.
    for name in list(example.keys()):
        t = example[name]
        if t.dtype == tf.int64:
            t = tf.cast(t,tf.int32)
        example[name] = t

    return example

In [15]:
def input_fn_builder(input_files,
                     max_seq_length,
                     max_predictions_per_seq,
                     is_training,
                     num_cpu_threads=4):
    """Creates an `input_fn` closure to be passed to TPUEstimator."""
    
    def input_fn(params):
        """The actual input function."""
        batch_size = params["batch_size"]

        name_to_features = {
            "input_ids":
                tf.io.FixedLenFeature([max_seq_length], tf.int64),
            # NSP를 하지 않으므로, 값이 살짝 다릅니다 참고하세요
            "input_mask":
                tf.io.FixedLenFeature([max_seq_length], tf.int64),
            "masked_lm_positions":
                tf.io.FixedLenFeature([max_predictions_per_seq], tf.int64),
            "masked_lm_ids":
                tf.io.FixedLenFeature([max_predictions_per_seq], tf.int64),
            "masked_lm_weights":
                tf.io.FixedLenFeature([max_predictions_per_seq], tf.float32)
        }

        # For training, we want a lot of parallel reading and shuffling.
        # For eval, we want no shuffling and parallel reading doesn't matter.
        if is_training:
            d = tf.data.Dataset.from_tensor_slices(tf.constant(input_files))
            d = d.repeat()
            d = d.shuffle(buffer_size=len(input_files))

            # `cycle_length` is the number of parallel files that get read.
            cycle_length = min(num_cpu_threads, len(input_files))

            # `sloppy` mode means that the interleaving is not exact. This adds
            # even more randomness to the training pipeline.
            d = d.apply(
                tf.data.experimental.parallel_interleave(
                    tf.data.TFRecordDataset,
                    sloppy=is_training,
                    cycle_length=cycle_length))
            d = d.shuffle(buffer_size=100)
        else:
            d = tf.data.TFRecordDataset(input_files)
            # Since we evaluate for a fixed number of steps we don't want to encounter
            # out-of-range exceptions.
            d = d.repeat()

        # We must `drop_remainder` on training because the TPU requires fixed
        # size dimensions. For eval, we assume we are evaluating on the CPU or GPU
        # and we *don't* want to drop the remainder, otherwise we wont cover
        # every sample.
        d = d.apply(
            tf.data.experimental.map_and_batch(
                lambda record: _decode_record(record, name_to_features),
                batch_size=batch_size,
                num_parallel_batches=num_cpu_threads,
                drop_remainder=True))
        return d

    return input_fn

In [16]:
tf.logging.set_verbosity(tf.logging.INFO)

if not do_train and not do_eval:
    raise ValueError("At least one of `do_train` or `do_eval` must be True.")

# TPU로 BERT학습을 시키기 위한 default config 호출
bert_config = modeling.BertConfig.from_json_file(bert_config_file)

tf.gfile.MakeDirs(output_dir)

input_files = []
for input_pattern in input_file.split(","):
    input_files.extend(tf.gfile.Glob(input_pattern))

tf.logging.info("*** Input Files ***")
for input_file in input_files:
    tf.logging.info("input_file_name:  %s" % input_file)

init_checkpoint = None
num_warmup_steps = 10000
use_tpu = False

tpu_cluster_resolver = None
if use_tpu and tpu_name:
    # tpu 활성화
    tpu_cluster_resolver = tf.estimator.tpu.cluster_resolver.TPUClusterResolver(
        tpu_name, zone=tpu_zone, project=gcp_project)

is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2
run_config = tf.estimator.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    master=None,
    model_dir=output_dir,
    save_checkpoints_steps=save_checkpoints_steps,
    tpu_config=tf.estimator.tpu.TPUConfig(
        iterations_per_loop=1000,
        num_shards=8,
        per_host_input_for_training=is_per_host))

# 메소드 편하게 쓰기위한 파라미터 세팅
model_fn = model_fn_builder(
    bert_config=bert_config,
    init_checkpoint=init_checkpoint,
    learning_rate=learning_rate,
    num_train_steps=num_train_steps,
    num_warmup_steps=num_warmup_steps,
    use_tpu=use_tpu,
    use_one_hot_embeddings=use_tpu)

# TPU를 쓸거면 TPU 설정을 하고, TPU가 없다면, 로컬 세팅을 설정하게 됨.
# 학습해야할 모델을 model_fn을 통해 bert로 설정한다. (modeling 클래스의 제공되는 BERT Archi)
estimator = tf.estimator.tpu.TPUEstimator(
    use_tpu=use_tpu,
    model_fn=model_fn,
    config=run_config,
    train_batch_size=train_batch_size,
    eval_batch_size=eval_batch_size)

if do_train:
    # masked된 여러 샘플들을 계속 배치수만큼 꺼내서 estimator에 선언된 모델로 학습을 진행시킨다.
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Batch size = %d", train_batch_size)
    train_input_fn = input_fn_builder(
        input_files=input_files,
        max_seq_length=max_seq_length,
        max_predictions_per_seq=max_predictions_per_seq,
        is_training=True)
    estimator.train(input_fn=train_input_fn,
                    max_steps=num_train_steps)

if do_eval:
    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Batch size = %d", eval_batch_size)
    # 순서대로 그냥 만들어놓은 tfrecode값을 가져와서 하나하나 bert모델을 검증하게된다.
    eval_input_fn = input_fn_builder(
        input_files=input_files,
        max_seq_length=max_seq_length,
        max_predictions_per_seq=max_predictions_per_seq,
        is_training=False)
    # 모델 검증
    result = estimator.evaluate(
        input_fn=eval_input_fn, steps=max_eval_steps)
    # 검증결과 저장
    output_eval_file = os.path.join(output_dir, "eval_results.txt")
    # pre-trained 모델 저장
    with tf.gfile.GFile(output_eval_file, "w") as writer:
        tf.logging.info("***** Eval results *****")
        for key in sorted(result.keys()):
            tf.logging.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

INFO:tensorflow:*** Input Files ***


2022-01-19 14:23:16,822 - tensorflow - INFO - *** Input Files ***


INFO:tensorflow:input_file_name:  .\test.tf_record


2022-01-19 14:23:16,823 - tensorflow - INFO - input_file_name:  .\test.tf_record






INFO:tensorflow:Using config: {'_model_dir': './model_output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 10, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=1000, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=

2022-01-19 14:23:16,886 - tensorflow - INFO - Using config: {'_model_dir': './model_output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 10, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=1000, num_shards=8, num_cores_per_replica=None

INFO:tensorflow:_TPUContext: eval_on_tpu True


2022-01-19 14:23:16,887 - tensorflow - INFO - _TPUContext: eval_on_tpu True






INFO:tensorflow:***** Running training *****


2022-01-19 14:23:16,888 - tensorflow - INFO - ***** Running training *****


INFO:tensorflow:  Batch size = 14


2022-01-19 14:23:16,889 - tensorflow - INFO -   Batch size = 14


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.experimental_deterministic`.


Instructions for updating:
Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.experimental_deterministic`.


Instructions for updating:
Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by `tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data optimizations will take care of using the fused implementation.


Instructions for updating:
Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by `tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data optimizations will take care of using the fused implementation.


INFO:tensorflow:Calling model_fn.


2022-01-19 14:23:16,995 - tensorflow - INFO - Calling model_fn.


INFO:tensorflow:Running train on CPU/GPU


2022-01-19 14:23:16,995 - tensorflow - INFO - Running train on CPU/GPU
2022-01-19 14:23:16,996 - root - INFO - *** Features ***
2022-01-19 14:23:16,997 - root - INFO -   name = input_ids, shape = (14, 7)
2022-01-19 14:23:16,997 - root - INFO -   name = input_mask, shape = (14, 7)
2022-01-19 14:23:16,997 - root - INFO -   name = masked_lm_ids, shape = (14, 4)
2022-01-19 14:23:16,998 - root - INFO -   name = masked_lm_positions, shape = (14, 4)
2022-01-19 14:23:16,998 - root - INFO -   name = masked_lm_weights, shape = (14, 4)


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Tensor("bert/pooler/dense/Tanh:0", shape=(14, 768), dtype=float32)
INFO:tensorflow:**** Trainable Variables ****


2022-01-19 14:23:18,332 - tensorflow - INFO - **** Trainable Variables ****
2022-01-19 14:23:18,333 - root - INFO -   name = bert/embeddings/word_embeddings:0, shape = (7, 768)
2022-01-19 14:23:18,334 - root - INFO -   name = bert/embeddings/LayerNorm/gamma:0, shape = (768,)
2022-01-19 14:23:18,334 - root - INFO -   name = bert/embeddings/LayerNorm/beta:0, shape = (768,)
2022-01-19 14:23:18,334 - root - INFO -   name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768)
2022-01-19 14:23:18,334 - root - INFO -   name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,)
2022-01-19 14:23:18,335 - root - INFO -   name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (768, 768)
2022-01-19 14:23:18,335 - root - INFO -   name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (768,)
2022-01-19 14:23:18,335 - root - INFO -   name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (768, 768)
2022-01-19 14:23:18,335 - root - INFO -   

INFO:tensorflow:Done calling model_fn.


2022-01-19 14:23:21,708 - tensorflow - INFO - Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


2022-01-19 14:23:21,710 - tensorflow - INFO - Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


2022-01-19 14:23:30,741 - tensorflow - INFO - Graph was finalized.


INFO:tensorflow:Running local_init_op.


2022-01-19 14:23:33,728 - tensorflow - INFO - Running local_init_op.


INFO:tensorflow:Done running local_init_op.


2022-01-19 14:23:33,851 - tensorflow - INFO - Done running local_init_op.


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...


2022-01-19 14:23:47,717 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 0...


INFO:tensorflow:Saving checkpoints for 0 into ./model_output\model.ckpt.


2022-01-19 14:23:47,723 - tensorflow - INFO - Saving checkpoints for 0 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...


2022-01-19 14:23:56,098 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 0...


INFO:tensorflow:global_step/sec: 0.577778


2022-01-19 14:24:01,273 - tensorflow - INFO - global_step/sec: 0.577778


INFO:tensorflow:examples/sec: 8.08889


2022-01-19 14:24:01,274 - tensorflow - INFO - examples/sec: 8.08889


INFO:tensorflow:global_step/sec: 6.42564


2022-01-19 14:24:01,429 - tensorflow - INFO - global_step/sec: 6.42564


INFO:tensorflow:examples/sec: 89.959


2022-01-19 14:24:01,431 - tensorflow - INFO - examples/sec: 89.959


INFO:tensorflow:global_step/sec: 6.86942


2022-01-19 14:24:01,574 - tensorflow - INFO - global_step/sec: 6.86942


INFO:tensorflow:examples/sec: 96.1719


2022-01-19 14:24:01,575 - tensorflow - INFO - examples/sec: 96.1719


INFO:tensorflow:global_step/sec: 7.59792


2022-01-19 14:24:01,706 - tensorflow - INFO - global_step/sec: 7.59792


INFO:tensorflow:examples/sec: 106.371


2022-01-19 14:24:01,707 - tensorflow - INFO - examples/sec: 106.371


INFO:tensorflow:global_step/sec: 7.81655


2022-01-19 14:24:01,834 - tensorflow - INFO - global_step/sec: 7.81655


INFO:tensorflow:examples/sec: 109.432


2022-01-19 14:24:01,836 - tensorflow - INFO - examples/sec: 109.432


INFO:tensorflow:global_step/sec: 7.74161


2022-01-19 14:24:01,963 - tensorflow - INFO - global_step/sec: 7.74161


INFO:tensorflow:examples/sec: 108.383


2022-01-19 14:24:01,964 - tensorflow - INFO - examples/sec: 108.383


INFO:tensorflow:global_step/sec: 7.73534


2022-01-19 14:24:02,092 - tensorflow - INFO - global_step/sec: 7.73534


INFO:tensorflow:examples/sec: 108.295


2022-01-19 14:24:02,093 - tensorflow - INFO - examples/sec: 108.295


INFO:tensorflow:global_step/sec: 7.8603


2022-01-19 14:24:02,220 - tensorflow - INFO - global_step/sec: 7.8603


INFO:tensorflow:examples/sec: 110.044


2022-01-19 14:24:02,221 - tensorflow - INFO - examples/sec: 110.044


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 10...


2022-01-19 14:24:02,347 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 10...


INFO:tensorflow:Saving checkpoints for 10 into ./model_output\model.ckpt.


2022-01-19 14:24:02,348 - tensorflow - INFO - Saving checkpoints for 10 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 10...


2022-01-19 14:24:08,888 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 10...


INFO:tensorflow:global_step/sec: 0.149923


2022-01-19 14:24:08,890 - tensorflow - INFO - global_step/sec: 0.149923


INFO:tensorflow:examples/sec: 2.09893


2022-01-19 14:24:08,891 - tensorflow - INFO - examples/sec: 2.09893


INFO:tensorflow:global_step/sec: 2.84714


2022-01-19 14:24:09,241 - tensorflow - INFO - global_step/sec: 2.84714


INFO:tensorflow:examples/sec: 39.86


2022-01-19 14:24:09,242 - tensorflow - INFO - examples/sec: 39.86


INFO:tensorflow:global_step/sec: 7.95774


2022-01-19 14:24:09,367 - tensorflow - INFO - global_step/sec: 7.95774


INFO:tensorflow:examples/sec: 111.408


2022-01-19 14:24:09,368 - tensorflow - INFO - examples/sec: 111.408


INFO:tensorflow:global_step/sec: 7.30799


2022-01-19 14:24:09,504 - tensorflow - INFO - global_step/sec: 7.30799


INFO:tensorflow:examples/sec: 102.312


2022-01-19 14:24:09,505 - tensorflow - INFO - examples/sec: 102.312


INFO:tensorflow:global_step/sec: 7.71098


2022-01-19 14:24:09,633 - tensorflow - INFO - global_step/sec: 7.71098


INFO:tensorflow:examples/sec: 107.954


2022-01-19 14:24:09,634 - tensorflow - INFO - examples/sec: 107.954


INFO:tensorflow:global_step/sec: 7.88339


2022-01-19 14:24:09,760 - tensorflow - INFO - global_step/sec: 7.88339


INFO:tensorflow:examples/sec: 110.368


2022-01-19 14:24:09,762 - tensorflow - INFO - examples/sec: 110.368


INFO:tensorflow:global_step/sec: 8.01307


2022-01-19 14:24:09,885 - tensorflow - INFO - global_step/sec: 8.01307


INFO:tensorflow:examples/sec: 112.183


2022-01-19 14:24:09,886 - tensorflow - INFO - examples/sec: 112.183


INFO:tensorflow:global_step/sec: 8.18155


2022-01-19 14:24:10,007 - tensorflow - INFO - global_step/sec: 8.18155


INFO:tensorflow:examples/sec: 114.542


2022-01-19 14:24:10,009 - tensorflow - INFO - examples/sec: 114.542


INFO:tensorflow:global_step/sec: 8.1543


2022-01-19 14:24:10,131 - tensorflow - INFO - global_step/sec: 8.1543


INFO:tensorflow:examples/sec: 114.16


2022-01-19 14:24:10,132 - tensorflow - INFO - examples/sec: 114.16


INFO:tensorflow:global_step/sec: 7.96509


2022-01-19 14:24:10,255 - tensorflow - INFO - global_step/sec: 7.96509


INFO:tensorflow:examples/sec: 111.511


2022-01-19 14:24:10,257 - tensorflow - INFO - examples/sec: 111.511


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 20...


2022-01-19 14:24:10,381 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 20...


INFO:tensorflow:Saving checkpoints for 20 into ./model_output\model.ckpt.


2022-01-19 14:24:10,381 - tensorflow - INFO - Saving checkpoints for 20 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 20...


2022-01-19 14:24:17,443 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 20...


INFO:tensorflow:global_step/sec: 0.139098


2022-01-19 14:24:17,445 - tensorflow - INFO - global_step/sec: 0.139098


INFO:tensorflow:examples/sec: 1.94738


2022-01-19 14:24:17,445 - tensorflow - INFO - examples/sec: 1.94738


INFO:tensorflow:global_step/sec: 2.74873


2022-01-19 14:24:17,808 - tensorflow - INFO - global_step/sec: 2.74873


INFO:tensorflow:examples/sec: 38.4822


2022-01-19 14:24:17,809 - tensorflow - INFO - examples/sec: 38.4822


INFO:tensorflow:global_step/sec: 7.94526


2022-01-19 14:24:17,934 - tensorflow - INFO - global_step/sec: 7.94526


INFO:tensorflow:examples/sec: 111.234


2022-01-19 14:24:17,935 - tensorflow - INFO - examples/sec: 111.234


INFO:tensorflow:global_step/sec: 7.37845


2022-01-19 14:24:18,069 - tensorflow - INFO - global_step/sec: 7.37845


INFO:tensorflow:examples/sec: 103.298


2022-01-19 14:24:18,070 - tensorflow - INFO - examples/sec: 103.298


INFO:tensorflow:global_step/sec: 7.47504


2022-01-19 14:24:18,203 - tensorflow - INFO - global_step/sec: 7.47504


INFO:tensorflow:examples/sec: 104.651


2022-01-19 14:24:18,205 - tensorflow - INFO - examples/sec: 104.651


INFO:tensorflow:global_step/sec: 7.65397


2022-01-19 14:24:18,334 - tensorflow - INFO - global_step/sec: 7.65397


INFO:tensorflow:examples/sec: 107.156


2022-01-19 14:24:18,335 - tensorflow - INFO - examples/sec: 107.156


INFO:tensorflow:global_step/sec: 7.58413


2022-01-19 14:24:18,466 - tensorflow - INFO - global_step/sec: 7.58413


INFO:tensorflow:examples/sec: 106.178


2022-01-19 14:24:18,468 - tensorflow - INFO - examples/sec: 106.178


INFO:tensorflow:global_step/sec: 7.64835


2022-01-19 14:24:18,597 - tensorflow - INFO - global_step/sec: 7.64835


INFO:tensorflow:examples/sec: 107.077


2022-01-19 14:24:18,598 - tensorflow - INFO - examples/sec: 107.077


INFO:tensorflow:global_step/sec: 7.59415


2022-01-19 14:24:18,728 - tensorflow - INFO - global_step/sec: 7.59415


INFO:tensorflow:examples/sec: 106.318


2022-01-19 14:24:18,729 - tensorflow - INFO - examples/sec: 106.318


INFO:tensorflow:global_step/sec: 7.7457


2022-01-19 14:24:18,857 - tensorflow - INFO - global_step/sec: 7.7457


INFO:tensorflow:examples/sec: 108.44


2022-01-19 14:24:18,858 - tensorflow - INFO - examples/sec: 108.44


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 30...


2022-01-19 14:24:18,985 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 30...


INFO:tensorflow:Saving checkpoints for 30 into ./model_output\model.ckpt.


2022-01-19 14:24:18,986 - tensorflow - INFO - Saving checkpoints for 30 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 30...


2022-01-19 14:24:25,717 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 30...


INFO:tensorflow:global_step/sec: 0.145721


2022-01-19 14:24:25,720 - tensorflow - INFO - global_step/sec: 0.145721


INFO:tensorflow:examples/sec: 2.0401


2022-01-19 14:24:25,721 - tensorflow - INFO - examples/sec: 2.0401


INFO:tensorflow:global_step/sec: 3.23443


2022-01-19 14:24:26,029 - tensorflow - INFO - global_step/sec: 3.23443


INFO:tensorflow:examples/sec: 45.282


2022-01-19 14:24:26,030 - tensorflow - INFO - examples/sec: 45.282


INFO:tensorflow:global_step/sec: 8.08608


2022-01-19 14:24:26,154 - tensorflow - INFO - global_step/sec: 8.08608


INFO:tensorflow:examples/sec: 113.205


2022-01-19 14:24:26,155 - tensorflow - INFO - examples/sec: 113.205


INFO:tensorflow:global_step/sec: 8.01656


2022-01-19 14:24:26,277 - tensorflow - INFO - global_step/sec: 8.01656


INFO:tensorflow:examples/sec: 112.232


2022-01-19 14:24:26,278 - tensorflow - INFO - examples/sec: 112.232


INFO:tensorflow:global_step/sec: 7.73636


2022-01-19 14:24:26,407 - tensorflow - INFO - global_step/sec: 7.73636


INFO:tensorflow:examples/sec: 108.309


2022-01-19 14:24:26,408 - tensorflow - INFO - examples/sec: 108.309


INFO:tensorflow:global_step/sec: 8.05358


2022-01-19 14:24:26,531 - tensorflow - INFO - global_step/sec: 8.05358


INFO:tensorflow:examples/sec: 112.75


2022-01-19 14:24:26,532 - tensorflow - INFO - examples/sec: 112.75


INFO:tensorflow:global_step/sec: 8.06276


2022-01-19 14:24:26,656 - tensorflow - INFO - global_step/sec: 8.06276


INFO:tensorflow:examples/sec: 112.879


2022-01-19 14:24:26,657 - tensorflow - INFO - examples/sec: 112.879


INFO:tensorflow:global_step/sec: 8.10777


2022-01-19 14:24:26,779 - tensorflow - INFO - global_step/sec: 8.10777


INFO:tensorflow:examples/sec: 113.509


2022-01-19 14:24:26,780 - tensorflow - INFO - examples/sec: 113.509


INFO:tensorflow:global_step/sec: 8.10551


2022-01-19 14:24:26,902 - tensorflow - INFO - global_step/sec: 8.10551


INFO:tensorflow:examples/sec: 113.477


2022-01-19 14:24:26,903 - tensorflow - INFO - examples/sec: 113.477


INFO:tensorflow:global_step/sec: 7.89653


2022-01-19 14:24:27,028 - tensorflow - INFO - global_step/sec: 7.89653


INFO:tensorflow:examples/sec: 110.551


2022-01-19 14:24:27,029 - tensorflow - INFO - examples/sec: 110.551


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 40...


2022-01-19 14:24:27,152 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 40...


INFO:tensorflow:Saving checkpoints for 40 into ./model_output\model.ckpt.


2022-01-19 14:24:27,153 - tensorflow - INFO - Saving checkpoints for 40 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 40...


2022-01-19 14:24:34,152 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 40...


INFO:tensorflow:global_step/sec: 0.140356


2022-01-19 14:24:34,154 - tensorflow - INFO - global_step/sec: 0.140356


INFO:tensorflow:examples/sec: 1.96498


2022-01-19 14:24:34,154 - tensorflow - INFO - examples/sec: 1.96498


INFO:tensorflow:global_step/sec: 3.6081


2022-01-19 14:24:34,430 - tensorflow - INFO - global_step/sec: 3.6081


INFO:tensorflow:examples/sec: 50.5134


2022-01-19 14:24:34,431 - tensorflow - INFO - examples/sec: 50.5134


INFO:tensorflow:global_step/sec: 7.65173


2022-01-19 14:24:34,561 - tensorflow - INFO - global_step/sec: 7.65173


INFO:tensorflow:examples/sec: 107.124


2022-01-19 14:24:34,562 - tensorflow - INFO - examples/sec: 107.124


INFO:tensorflow:global_step/sec: 7.95127


2022-01-19 14:24:34,686 - tensorflow - INFO - global_step/sec: 7.95127


INFO:tensorflow:examples/sec: 111.318


2022-01-19 14:24:34,688 - tensorflow - INFO - examples/sec: 111.318


INFO:tensorflow:global_step/sec: 7.54006


2022-01-19 14:24:34,820 - tensorflow - INFO - global_step/sec: 7.54006


INFO:tensorflow:examples/sec: 105.561


2022-01-19 14:24:34,821 - tensorflow - INFO - examples/sec: 105.561


INFO:tensorflow:global_step/sec: 7.71291


2022-01-19 14:24:34,949 - tensorflow - INFO - global_step/sec: 7.71291


INFO:tensorflow:examples/sec: 107.981


2022-01-19 14:24:34,950 - tensorflow - INFO - examples/sec: 107.981


INFO:tensorflow:global_step/sec: 7.94745


2022-01-19 14:24:35,075 - tensorflow - INFO - global_step/sec: 7.94745


INFO:tensorflow:examples/sec: 111.264


2022-01-19 14:24:35,077 - tensorflow - INFO - examples/sec: 111.264


INFO:tensorflow:global_step/sec: 7.95771


2022-01-19 14:24:35,200 - tensorflow - INFO - global_step/sec: 7.95771


INFO:tensorflow:examples/sec: 111.408


2022-01-19 14:24:35,201 - tensorflow - INFO - examples/sec: 111.408


INFO:tensorflow:global_step/sec: 8.01262


2022-01-19 14:24:35,326 - tensorflow - INFO - global_step/sec: 8.01262


INFO:tensorflow:examples/sec: 112.177


2022-01-19 14:24:35,327 - tensorflow - INFO - examples/sec: 112.177


INFO:tensorflow:global_step/sec: 7.97296


2022-01-19 14:24:35,450 - tensorflow - INFO - global_step/sec: 7.97296


INFO:tensorflow:examples/sec: 111.621


2022-01-19 14:24:35,452 - tensorflow - INFO - examples/sec: 111.621


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 50...


2022-01-19 14:24:35,576 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 50...


INFO:tensorflow:Saving checkpoints for 50 into ./model_output\model.ckpt.


2022-01-19 14:24:35,577 - tensorflow - INFO - Saving checkpoints for 50 into ./model_output\model.ckpt.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


Instructions for updating:
Use standard file APIs to delete files with this prefix.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 50...


2022-01-19 14:24:42,293 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 50...


INFO:tensorflow:global_step/sec: 0.146094


2022-01-19 14:24:42,295 - tensorflow - INFO - global_step/sec: 0.146094


INFO:tensorflow:examples/sec: 2.04531


2022-01-19 14:24:42,296 - tensorflow - INFO - examples/sec: 2.04531


INFO:tensorflow:global_step/sec: 3.31279


2022-01-19 14:24:42,597 - tensorflow - INFO - global_step/sec: 3.31279


INFO:tensorflow:examples/sec: 46.379


2022-01-19 14:24:42,598 - tensorflow - INFO - examples/sec: 46.379


INFO:tensorflow:global_step/sec: 7.47291


2022-01-19 14:24:42,732 - tensorflow - INFO - global_step/sec: 7.47291


INFO:tensorflow:examples/sec: 104.621


2022-01-19 14:24:42,733 - tensorflow - INFO - examples/sec: 104.621


INFO:tensorflow:global_step/sec: 7.22487


2022-01-19 14:24:42,869 - tensorflow - INFO - global_step/sec: 7.22487


INFO:tensorflow:examples/sec: 101.148


2022-01-19 14:24:42,871 - tensorflow - INFO - examples/sec: 101.148


INFO:tensorflow:global_step/sec: 7.13993


2022-01-19 14:24:43,010 - tensorflow - INFO - global_step/sec: 7.13993


INFO:tensorflow:examples/sec: 99.9591


2022-01-19 14:24:43,011 - tensorflow - INFO - examples/sec: 99.9591


INFO:tensorflow:global_step/sec: 7.85747


2022-01-19 14:24:43,137 - tensorflow - INFO - global_step/sec: 7.85747


INFO:tensorflow:examples/sec: 110.005


2022-01-19 14:24:43,138 - tensorflow - INFO - examples/sec: 110.005


INFO:tensorflow:global_step/sec: 7.65437


2022-01-19 14:24:43,267 - tensorflow - INFO - global_step/sec: 7.65437


INFO:tensorflow:examples/sec: 107.161


2022-01-19 14:24:43,268 - tensorflow - INFO - examples/sec: 107.161


INFO:tensorflow:global_step/sec: 7.5969


2022-01-19 14:24:43,399 - tensorflow - INFO - global_step/sec: 7.5969


INFO:tensorflow:examples/sec: 106.357


2022-01-19 14:24:43,400 - tensorflow - INFO - examples/sec: 106.357


INFO:tensorflow:global_step/sec: 7.95444


2022-01-19 14:24:43,525 - tensorflow - INFO - global_step/sec: 7.95444


INFO:tensorflow:examples/sec: 111.362


2022-01-19 14:24:43,526 - tensorflow - INFO - examples/sec: 111.362


INFO:tensorflow:global_step/sec: 8.02142


2022-01-19 14:24:43,650 - tensorflow - INFO - global_step/sec: 8.02142


INFO:tensorflow:examples/sec: 112.3


2022-01-19 14:24:43,651 - tensorflow - INFO - examples/sec: 112.3


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 60...


2022-01-19 14:24:43,775 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 60...


INFO:tensorflow:Saving checkpoints for 60 into ./model_output\model.ckpt.


2022-01-19 14:24:43,776 - tensorflow - INFO - Saving checkpoints for 60 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 60...


2022-01-19 14:24:51,422 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 60...


INFO:tensorflow:global_step/sec: 0.128628


2022-01-19 14:24:51,424 - tensorflow - INFO - global_step/sec: 0.128628


INFO:tensorflow:examples/sec: 1.80079


2022-01-19 14:24:51,425 - tensorflow - INFO - examples/sec: 1.80079


INFO:tensorflow:global_step/sec: 2.75631


2022-01-19 14:24:51,787 - tensorflow - INFO - global_step/sec: 2.75631


INFO:tensorflow:examples/sec: 38.5883


2022-01-19 14:24:51,788 - tensorflow - INFO - examples/sec: 38.5883


INFO:tensorflow:global_step/sec: 7.89466


2022-01-19 14:24:51,913 - tensorflow - INFO - global_step/sec: 7.89466


INFO:tensorflow:examples/sec: 110.525


2022-01-19 14:24:51,914 - tensorflow - INFO - examples/sec: 110.525


INFO:tensorflow:global_step/sec: 7.30065


2022-01-19 14:24:52,050 - tensorflow - INFO - global_step/sec: 7.30065


INFO:tensorflow:examples/sec: 102.209


2022-01-19 14:24:52,051 - tensorflow - INFO - examples/sec: 102.209


INFO:tensorflow:global_step/sec: 7.54434


2022-01-19 14:24:52,183 - tensorflow - INFO - global_step/sec: 7.54434


INFO:tensorflow:examples/sec: 105.621


2022-01-19 14:24:52,184 - tensorflow - INFO - examples/sec: 105.621


INFO:tensorflow:global_step/sec: 7.90877


2022-01-19 14:24:52,309 - tensorflow - INFO - global_step/sec: 7.90877


INFO:tensorflow:examples/sec: 110.723


2022-01-19 14:24:52,310 - tensorflow - INFO - examples/sec: 110.723


INFO:tensorflow:global_step/sec: 7.65399


2022-01-19 14:24:52,440 - tensorflow - INFO - global_step/sec: 7.65399


INFO:tensorflow:examples/sec: 107.156


2022-01-19 14:24:52,441 - tensorflow - INFO - examples/sec: 107.156


INFO:tensorflow:global_step/sec: 7.95779


2022-01-19 14:24:52,567 - tensorflow - INFO - global_step/sec: 7.95779


INFO:tensorflow:examples/sec: 111.409


2022-01-19 14:24:52,567 - tensorflow - INFO - examples/sec: 111.409


INFO:tensorflow:global_step/sec: 7.67506


2022-01-19 14:24:52,696 - tensorflow - INFO - global_step/sec: 7.67506


INFO:tensorflow:examples/sec: 107.451


2022-01-19 14:24:52,697 - tensorflow - INFO - examples/sec: 107.451


INFO:tensorflow:global_step/sec: 7.77845


2022-01-19 14:24:52,824 - tensorflow - INFO - global_step/sec: 7.77845


INFO:tensorflow:examples/sec: 108.898


2022-01-19 14:24:52,825 - tensorflow - INFO - examples/sec: 108.898


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 70...


2022-01-19 14:24:52,949 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 70...


INFO:tensorflow:Saving checkpoints for 70 into ./model_output\model.ckpt.


2022-01-19 14:24:52,951 - tensorflow - INFO - Saving checkpoints for 70 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 70...


2022-01-19 14:24:59,950 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 70...


INFO:tensorflow:global_step/sec: 0.140294


2022-01-19 14:24:59,952 - tensorflow - INFO - global_step/sec: 0.140294


INFO:tensorflow:examples/sec: 1.96411


2022-01-19 14:24:59,953 - tensorflow - INFO - examples/sec: 1.96411


INFO:tensorflow:global_step/sec: 2.34078


2022-01-19 14:25:00,380 - tensorflow - INFO - global_step/sec: 2.34078


INFO:tensorflow:examples/sec: 32.7709


2022-01-19 14:25:00,381 - tensorflow - INFO - examples/sec: 32.7709


INFO:tensorflow:global_step/sec: 7.47625


2022-01-19 14:25:00,513 - tensorflow - INFO - global_step/sec: 7.47625


INFO:tensorflow:examples/sec: 104.668


2022-01-19 14:25:00,514 - tensorflow - INFO - examples/sec: 104.668


INFO:tensorflow:global_step/sec: 7.34021


2022-01-19 14:25:00,650 - tensorflow - INFO - global_step/sec: 7.34021


INFO:tensorflow:examples/sec: 102.763


2022-01-19 14:25:00,652 - tensorflow - INFO - examples/sec: 102.763


INFO:tensorflow:global_step/sec: 7.79426


2022-01-19 14:25:00,778 - tensorflow - INFO - global_step/sec: 7.79426


INFO:tensorflow:examples/sec: 109.12


2022-01-19 14:25:00,779 - tensorflow - INFO - examples/sec: 109.12


INFO:tensorflow:global_step/sec: 7.55769


2022-01-19 14:25:00,910 - tensorflow - INFO - global_step/sec: 7.55769


INFO:tensorflow:examples/sec: 105.808


2022-01-19 14:25:00,912 - tensorflow - INFO - examples/sec: 105.808


INFO:tensorflow:global_step/sec: 7.71288


2022-01-19 14:25:01,040 - tensorflow - INFO - global_step/sec: 7.71288


INFO:tensorflow:examples/sec: 107.98


2022-01-19 14:25:01,041 - tensorflow - INFO - examples/sec: 107.98


INFO:tensorflow:global_step/sec: 7.65708


2022-01-19 14:25:01,170 - tensorflow - INFO - global_step/sec: 7.65708


INFO:tensorflow:examples/sec: 107.199


2022-01-19 14:25:01,171 - tensorflow - INFO - examples/sec: 107.199


INFO:tensorflow:global_step/sec: 7.68242


2022-01-19 14:25:01,301 - tensorflow - INFO - global_step/sec: 7.68242


INFO:tensorflow:examples/sec: 107.554


2022-01-19 14:25:01,303 - tensorflow - INFO - examples/sec: 107.554


INFO:tensorflow:global_step/sec: 7.71798


2022-01-19 14:25:01,430 - tensorflow - INFO - global_step/sec: 7.71798


INFO:tensorflow:examples/sec: 108.052


2022-01-19 14:25:01,431 - tensorflow - INFO - examples/sec: 108.052


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 80...


2022-01-19 14:25:01,559 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 80...


INFO:tensorflow:Saving checkpoints for 80 into ./model_output\model.ckpt.


2022-01-19 14:25:01,560 - tensorflow - INFO - Saving checkpoints for 80 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 80...


2022-01-19 14:25:08,462 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 80...


INFO:tensorflow:global_step/sec: 0.142169


2022-01-19 14:25:08,464 - tensorflow - INFO - global_step/sec: 0.142169


INFO:tensorflow:examples/sec: 1.99037


2022-01-19 14:25:08,465 - tensorflow - INFO - examples/sec: 1.99037


INFO:tensorflow:global_step/sec: 3.09263


2022-01-19 14:25:08,787 - tensorflow - INFO - global_step/sec: 3.09263


INFO:tensorflow:examples/sec: 43.2968


2022-01-19 14:25:08,788 - tensorflow - INFO - examples/sec: 43.2968


INFO:tensorflow:global_step/sec: 7.558


2022-01-19 14:25:08,920 - tensorflow - INFO - global_step/sec: 7.558


INFO:tensorflow:examples/sec: 105.812


2022-01-19 14:25:08,921 - tensorflow - INFO - examples/sec: 105.812


INFO:tensorflow:global_step/sec: 7.77268


2022-01-19 14:25:09,048 - tensorflow - INFO - global_step/sec: 7.77268


INFO:tensorflow:examples/sec: 108.818


2022-01-19 14:25:09,049 - tensorflow - INFO - examples/sec: 108.818


INFO:tensorflow:global_step/sec: 7.589


2022-01-19 14:25:09,180 - tensorflow - INFO - global_step/sec: 7.589


INFO:tensorflow:examples/sec: 106.246


2022-01-19 14:25:09,181 - tensorflow - INFO - examples/sec: 106.246


INFO:tensorflow:global_step/sec: 7.88842


2022-01-19 14:25:09,308 - tensorflow - INFO - global_step/sec: 7.88842


INFO:tensorflow:examples/sec: 110.438


2022-01-19 14:25:09,309 - tensorflow - INFO - examples/sec: 110.438


INFO:tensorflow:global_step/sec: 7.83456


2022-01-19 14:25:09,434 - tensorflow - INFO - global_step/sec: 7.83456


INFO:tensorflow:examples/sec: 109.684


2022-01-19 14:25:09,435 - tensorflow - INFO - examples/sec: 109.684


INFO:tensorflow:global_step/sec: 8.02137


2022-01-19 14:25:09,559 - tensorflow - INFO - global_step/sec: 8.02137


INFO:tensorflow:examples/sec: 112.299


2022-01-19 14:25:09,560 - tensorflow - INFO - examples/sec: 112.299


INFO:tensorflow:global_step/sec: 7.74423


2022-01-19 14:25:09,688 - tensorflow - INFO - global_step/sec: 7.74423


INFO:tensorflow:examples/sec: 108.419


2022-01-19 14:25:09,689 - tensorflow - INFO - examples/sec: 108.419


INFO:tensorflow:global_step/sec: 8.01131


2022-01-19 14:25:09,813 - tensorflow - INFO - global_step/sec: 8.01131


INFO:tensorflow:examples/sec: 112.158


2022-01-19 14:25:09,815 - tensorflow - INFO - examples/sec: 112.158


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 90...


2022-01-19 14:25:09,944 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 90...


INFO:tensorflow:Saving checkpoints for 90 into ./model_output\model.ckpt.


2022-01-19 14:25:09,945 - tensorflow - INFO - Saving checkpoints for 90 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 90...


2022-01-19 14:25:17,094 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 90...


INFO:tensorflow:global_step/sec: 0.137334


2022-01-19 14:25:17,096 - tensorflow - INFO - global_step/sec: 0.137334


INFO:tensorflow:examples/sec: 1.92268


2022-01-19 14:25:17,096 - tensorflow - INFO - examples/sec: 1.92268


INFO:tensorflow:global_step/sec: 3.13684


2022-01-19 14:25:17,413 - tensorflow - INFO - global_step/sec: 3.13684


INFO:tensorflow:examples/sec: 43.9158


2022-01-19 14:25:17,414 - tensorflow - INFO - examples/sec: 43.9158


INFO:tensorflow:global_step/sec: 7.86093


2022-01-19 14:25:17,541 - tensorflow - INFO - global_step/sec: 7.86093


INFO:tensorflow:examples/sec: 110.053


2022-01-19 14:25:17,542 - tensorflow - INFO - examples/sec: 110.053


INFO:tensorflow:global_step/sec: 7.76911


2022-01-19 14:25:17,670 - tensorflow - INFO - global_step/sec: 7.76911


INFO:tensorflow:examples/sec: 108.768


2022-01-19 14:25:17,671 - tensorflow - INFO - examples/sec: 108.768


INFO:tensorflow:global_step/sec: 7.48953


2022-01-19 14:25:17,804 - tensorflow - INFO - global_step/sec: 7.48953


INFO:tensorflow:examples/sec: 104.853


2022-01-19 14:25:17,805 - tensorflow - INFO - examples/sec: 104.853


INFO:tensorflow:global_step/sec: 7.82366


2022-01-19 14:25:17,931 - tensorflow - INFO - global_step/sec: 7.82366


INFO:tensorflow:examples/sec: 109.531


2022-01-19 14:25:17,932 - tensorflow - INFO - examples/sec: 109.531


INFO:tensorflow:global_step/sec: 7.95645


2022-01-19 14:25:18,057 - tensorflow - INFO - global_step/sec: 7.95645


INFO:tensorflow:examples/sec: 111.39


2022-01-19 14:25:18,058 - tensorflow - INFO - examples/sec: 111.39


INFO:tensorflow:global_step/sec: 7.77263


2022-01-19 14:25:18,185 - tensorflow - INFO - global_step/sec: 7.77263


INFO:tensorflow:examples/sec: 108.817


2022-01-19 14:25:18,186 - tensorflow - INFO - examples/sec: 108.817


INFO:tensorflow:global_step/sec: 7.84715


2022-01-19 14:25:18,312 - tensorflow - INFO - global_step/sec: 7.84715


INFO:tensorflow:examples/sec: 109.86


2022-01-19 14:25:18,313 - tensorflow - INFO - examples/sec: 109.86


INFO:tensorflow:global_step/sec: 7.79167


2022-01-19 14:25:18,442 - tensorflow - INFO - global_step/sec: 7.79167


INFO:tensorflow:examples/sec: 109.083


2022-01-19 14:25:18,443 - tensorflow - INFO - examples/sec: 109.083


INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 100...


2022-01-19 14:25:18,568 - tensorflow - INFO - Calling checkpoint listeners before saving checkpoint 100...


INFO:tensorflow:Saving checkpoints for 100 into ./model_output\model.ckpt.


2022-01-19 14:25:18,570 - tensorflow - INFO - Saving checkpoints for 100 into ./model_output\model.ckpt.


INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 100...


2022-01-19 14:25:25,497 - tensorflow - INFO - Calling checkpoint listeners after saving checkpoint 100...


INFO:tensorflow:global_step/sec: 0.141655


2022-01-19 14:25:25,500 - tensorflow - INFO - global_step/sec: 0.141655


INFO:tensorflow:examples/sec: 1.98317


2022-01-19 14:25:25,501 - tensorflow - INFO - examples/sec: 1.98317


INFO:tensorflow:Loss for final step: 0.007437828.


2022-01-19 14:25:25,587 - tensorflow - INFO - Loss for final step: 0.007437828.


INFO:tensorflow:training_loop marked as finished


2022-01-19 14:25:25,588 - tensorflow - INFO - training_loop marked as finished


INFO:tensorflow:***** Running evaluation *****


2022-01-19 14:25:25,589 - tensorflow - INFO - ***** Running evaluation *****


INFO:tensorflow:  Batch size = 14


2022-01-19 14:25:25,590 - tensorflow - INFO -   Batch size = 14


INFO:tensorflow:Calling model_fn.


2022-01-19 14:25:25,619 - tensorflow - INFO - Calling model_fn.


INFO:tensorflow:Running eval on CPU/GPU


2022-01-19 14:25:25,620 - tensorflow - INFO - Running eval on CPU/GPU
2022-01-19 14:25:25,621 - root - INFO - *** Features ***
2022-01-19 14:25:25,621 - root - INFO -   name = input_ids, shape = (14, 7)
2022-01-19 14:25:25,622 - root - INFO -   name = input_mask, shape = (14, 7)
2022-01-19 14:25:25,622 - root - INFO -   name = masked_lm_ids, shape = (14, 4)
2022-01-19 14:25:25,623 - root - INFO -   name = masked_lm_positions, shape = (14, 4)
2022-01-19 14:25:25,623 - root - INFO -   name = masked_lm_weights, shape = (14, 4)


Tensor("bert/pooler/dense/Tanh:0", shape=(14, 768), dtype=float32)
INFO:tensorflow:**** Trainable Variables ****


2022-01-19 14:25:26,882 - tensorflow - INFO - **** Trainable Variables ****
2022-01-19 14:25:26,883 - root - INFO -   name = bert/embeddings/word_embeddings:0, shape = (7, 768)
2022-01-19 14:25:26,884 - root - INFO -   name = bert/embeddings/LayerNorm/gamma:0, shape = (768,)
2022-01-19 14:25:26,884 - root - INFO -   name = bert/embeddings/LayerNorm/beta:0, shape = (768,)
2022-01-19 14:25:26,884 - root - INFO -   name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768)
2022-01-19 14:25:26,885 - root - INFO -   name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,)
2022-01-19 14:25:26,885 - root - INFO -   name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (768, 768)
2022-01-19 14:25:26,885 - root - INFO -   name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (768,)
2022-01-19 14:25:26,886 - root - INFO -   name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (768, 768)
2022-01-19 14:25:26,886 - root - INFO -   

INFO:tensorflow:Done calling model_fn.


2022-01-19 14:25:26,963 - tensorflow - INFO - Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2022-01-19T14:25:26Z


2022-01-19 14:25:26,979 - tensorflow - INFO - Starting evaluation at 2022-01-19T14:25:26Z


INFO:tensorflow:Graph was finalized.


2022-01-19 14:25:27,255 - tensorflow - INFO - Graph was finalized.


INFO:tensorflow:Restoring parameters from ./model_output\model.ckpt-100


2022-01-19 14:25:27,258 - tensorflow - INFO - Restoring parameters from ./model_output\model.ckpt-100


INFO:tensorflow:Running local_init_op.


2022-01-19 14:25:27,854 - tensorflow - INFO - Running local_init_op.


INFO:tensorflow:Done running local_init_op.


2022-01-19 14:25:27,877 - tensorflow - INFO - Done running local_init_op.


INFO:tensorflow:Evaluation [2/20]


2022-01-19 14:25:28,297 - tensorflow - INFO - Evaluation [2/20]


INFO:tensorflow:Evaluation [4/20]


2022-01-19 14:25:28,387 - tensorflow - INFO - Evaluation [4/20]


INFO:tensorflow:Evaluation [6/20]


2022-01-19 14:25:28,476 - tensorflow - INFO - Evaluation [6/20]


INFO:tensorflow:Evaluation [8/20]


2022-01-19 14:25:28,511 - tensorflow - INFO - Evaluation [8/20]


INFO:tensorflow:Evaluation [10/20]


2022-01-19 14:25:28,537 - tensorflow - INFO - Evaluation [10/20]


INFO:tensorflow:Evaluation [12/20]


2022-01-19 14:25:28,564 - tensorflow - INFO - Evaluation [12/20]


INFO:tensorflow:Evaluation [14/20]


2022-01-19 14:25:28,589 - tensorflow - INFO - Evaluation [14/20]


INFO:tensorflow:Evaluation [16/20]


2022-01-19 14:25:28,614 - tensorflow - INFO - Evaluation [16/20]


INFO:tensorflow:Evaluation [18/20]


2022-01-19 14:25:28,639 - tensorflow - INFO - Evaluation [18/20]


INFO:tensorflow:Evaluation [20/20]


2022-01-19 14:25:28,665 - tensorflow - INFO - Evaluation [20/20]


INFO:tensorflow:Inference Time : 1.71835s


2022-01-19 14:25:28,698 - tensorflow - INFO - Inference Time : 1.71835s


INFO:tensorflow:Finished evaluation at 2022-01-19-14:25:28


2022-01-19 14:25:28,699 - tensorflow - INFO - Finished evaluation at 2022-01-19-14:25:28


INFO:tensorflow:Saving dict for global step 100: global_step = 100, loss = 0.0029329616, masked_lm_accuracy = 1.0, masked_lm_loss = 0.002932962


2022-01-19 14:25:28,700 - tensorflow - INFO - Saving dict for global step 100: global_step = 100, loss = 0.0029329616, masked_lm_accuracy = 1.0, masked_lm_loss = 0.002932962


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 100: ./model_output\model.ckpt-100


2022-01-19 14:25:31,149 - tensorflow - INFO - Saving 'checkpoint_path' summary for global step 100: ./model_output\model.ckpt-100


INFO:tensorflow:evaluation_loop marked as finished


2022-01-19 14:25:31,152 - tensorflow - INFO - evaluation_loop marked as finished


INFO:tensorflow:***** Eval results *****


2022-01-19 14:25:31,152 - tensorflow - INFO - ***** Eval results *****


INFO:tensorflow:  global_step = 100


2022-01-19 14:25:31,153 - tensorflow - INFO -   global_step = 100


INFO:tensorflow:  loss = 0.0029329616


2022-01-19 14:25:31,153 - tensorflow - INFO -   loss = 0.0029329616


INFO:tensorflow:  masked_lm_accuracy = 1.0


2022-01-19 14:25:31,154 - tensorflow - INFO -   masked_lm_accuracy = 1.0


INFO:tensorflow:  masked_lm_loss = 0.002932962


2022-01-19 14:25:31,154 - tensorflow - INFO -   masked_lm_loss = 0.002932962
