In [1]:
import codecs
import collections
import json
import re
import os, time
import modeling
import tokenization
import tensorflow as tf
import numpy as np
import pandas as pd

tf.logging.set_verbosity(tf.logging.INFO)
use_tpu = False
master = None
num_tpu_cores = 8
use_one_hot_embeddings = False

In [2]:
class InputExample(object):
  def __init__(self, unique_id, text_a, text_b):
    self.unique_id = unique_id
    self.text_a = text_a
    self.text_b = text_b
    
class InputFeatures(object):
  """A single set of features of data."""
  def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids):
    self.unique_id = unique_id
    self.tokens = tokens
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.input_type_ids = input_type_ids

def input_fn_builder(features, seq_length):
  """Creates an `input_fn` closure to be passed to TPUEstimator."""
  all_unique_ids = []
  all_input_ids = []
  all_input_mask = []
  all_input_type_ids = []

  for feature in features:
    all_unique_ids.append(feature.unique_id)
    all_input_ids.append(feature.input_ids)
    all_input_mask.append(feature.input_mask)
    all_input_type_ids.append(feature.input_type_ids)

  def input_fn(params):
    """The actual input function."""
    batch_size = params["batch_size"]

    num_examples = len(features)

    d = tf.data.Dataset.from_tensor_slices({
        "unique_ids":
            tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32),
        "input_ids":
            tf.constant(
                all_input_ids, shape=[num_examples, seq_length],
                dtype=tf.int32),
        "input_mask":
            tf.constant(
                all_input_mask,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
        "input_type_ids":
            tf.constant(
                all_input_type_ids,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
    })

    d = d.batch(batch_size=batch_size)
    return d

  return input_fn

def model_fn_builder(bert_config, init_checkpoint, layer_indexes, use_tpu, use_one_hot_embeddings):
  """Returns `model_fn` closure for TPUEstimator."""

  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    input_type_ids = features["input_type_ids"]

    model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=input_type_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    if mode != tf.estimator.ModeKeys.PREDICT:
      raise ValueError("Only PREDICT modes are supported: %s" % (mode))

    tvars = tf.trainable_variables()
    scaffold_fn = None
    (assignment_map,initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
    if use_tpu:

      def tpu_scaffold():
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        return tf.train.Scaffold()

      scaffold_fn = tpu_scaffold
    else:
      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,init_string)

    all_layers = model.get_all_encoder_layers()
    predictions = {"unique_id": unique_ids,}
    for (i, layer_index) in enumerate(layer_indexes):
      predictions["layer_output_%d" % i] = all_layers[layer_index]

    output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    return output_spec

  return model_fn

def convert_examples_to_features(examples, seq_length, tokenizer, align=False):
  """Loads a data file into a list of `InputBatch`s."""

  features = []
  for (ex_index, example) in enumerate(examples):
    if isinstance(example.text_a, str):
        tokens_a = tokenizer.tokenize(example.text_a)
    elif isinstance(example.text_a, list):
        tokens_a = example.text_a

    tokens_b = None
    if example.text_b:
      tokens_b = example.text_b if isinstance(example.text_b, list) else tokenizer.tokenize(example.text_b)

    if tokens_b:
      _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
    else:
      if len(tokens_a) > seq_length - 2:
        tokens_a = tokens_a[0:(seq_length - 2)]
    tokens = []
    input_type_ids = []
    tokens.append("[CLS]")
    input_type_ids.append(0)
    for token in tokens_a:
      tokens.append(token)
      input_type_ids.append(0)
    tokens.append("[SEP]")
    input_type_ids.append(0)

    if tokens_b:
      for token in tokens_b:
        tokens.append(token)
        input_type_ids.append(1)
      tokens.append("[SEP]")
      input_type_ids.append(1)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # The mask has 1 for real tokens and 0 for padding tokens. Only real
    # tokens are attended to.
    input_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    while len(input_ids) < seq_length:
      input_ids.append(0)
      input_mask.append(1*align)
      input_type_ids.append(0)

    assert len(input_ids) == seq_length
    assert len(input_mask) == seq_length
    assert len(input_type_ids) == seq_length

    if ex_index < 5:
      tf.logging.info("*** Example ***")
      tf.logging.info("unique_id: %s" % (example.unique_id))
      tf.logging.info("tokens: %s" % " ".join(
          [tokenization.printable_text(x) for x in tokens]))
      tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
      tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
      tf.logging.info(
          "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids]))

    features.append(
        InputFeatures(
            unique_id=example.unique_id,
            tokens=tokens,
            input_ids=input_ids,
            input_mask=input_mask,
            input_type_ids=input_type_ids))
  return features

def _truncate_seq_pair(tokens_a, tokens_b, max_length):
  while True:
    total_length = len(tokens_a) + len(tokens_b)
    if total_length <= max_length:
      break
    if len(tokens_a) > len(tokens_b):
      tokens_a.pop()
    else:
      tokens_b.pop()

In [3]:
class BERT(object):
    def __init__(self, model_file="model/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt", 
                 model_config="model/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json",
                 vocab_file="model/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt",
                 output_layer_ix= [-1, -2, -3, -4]):
        self.model_file = model_file
        self.layer_indexes = output_layer_ix       
        bert_config = modeling.BertConfig.from_json_file(model_config)
        self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=False)
        self.sys_tokens = ['[CLS]', '[SEP]']
        model_fn = model_fn_builder(
            bert_config=bert_config,
            init_checkpoint=self.model_file,
            layer_indexes=self.layer_indexes,
            use_tpu=use_tpu,
            use_one_hot_embeddings=use_one_hot_embeddings)
        is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
        run_config = tf.contrib.tpu.RunConfig(
            master=master,
            tpu_config=tf.contrib.tpu.TPUConfig(
                num_shards=num_tpu_cores,
                per_host_input_for_training=is_per_host))
        self.estimator = tf.contrib.tpu.TPUEstimator(use_tpu=use_tpu, model_fn=model_fn, config=run_config, predict_batch_size=32)
    
    def to_vector(self, input_list, text_len=32, out_array=True):
        examples = []
        is_match = (isinstance(input_list[0], list) and len(input_list[0]) == 2)
        if is_match:
            for (j, s) in enumerate(input_list):
                examples += [InputExample(unique_id=j, text_a=s[0], text_b=s[1])]
        else:
            for (j, s) in enumerate(input_list):
                examples += [InputExample(unique_id=j, text_a=s, text_b=None)]
        features = convert_examples_to_features(examples=examples, seq_length=text_len, tokenizer=self.tokenizer, align=False)
        vec_output = []
        input_fn = input_fn_builder(features=features, seq_length=text_len)
        res = {int(r["unique_id"]): r for r in self.estimator.predict(input_fn, yield_single_examples=True)}
        for u in features:
            uu = res[u.unique_id]
            u_vec = []
            for (i, token) in enumerate(u.tokens+["[PAD]"]*(text_len-len(u.tokens))):
                if token not in self.sys_tokens:
                    v = []
                    for ck in range(len(self.layer_indexes)):
                        v += [round(float(x), 6) for x in uu["layer_output_%d" % ck][i:(i + 1)].flat]
                    u_vec.append(v)
            vec_output.append(u_vec)
        return np.array(vec_output) if out_array else vec_output

In [8]:
bert = BERT(model_file="result/bert_za201908_big/model.ckpt-100000")
t0 = time.time()
bert.to_vector(["跟你说我不是单身"]*1000)
print("use time:", time.time()-t0)

INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp476uyf5l', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb9cbf21390>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_

INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT

INFO:tensorflow:  name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:prediction_loop marked as finished
INFO:tensorflow:prediction_loop marked as finished
use time: 226.5103566646576


In [4]:
class TextCnn(object):
    def __init__(self, emb_size=768):
        self.input_len = 30
        self.emb_size = emb_size
        self.kernel_num = 128
        self.win_size_list = [1, 2, 3, 4, 5]
        self.num_filters_total = len(self.win_size_list)*self.kernel_num
        self.n_class = 5

    def predict(self, input_tf, kp=0.5):
        input_net = tf.expand_dims(input_tf, -1)
        pooled_outputs = []
        for i, filter_size in enumerate(self.win_size_list):
            filter_shape = [filter_size, self.emb_size, 1, self.kernel_num]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[self.kernel_num]), name="b")
            conv = tf.nn.conv2d(input_net,W, strides=[1, 1, 1, 1], padding="VALID", name="conv")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            pooled = tf.nn.max_pool(h, ksize=[1, self.input_len - filter_size + 1, 1, 1],
                                    strides=[1, 1, 1, 1], padding='VALID',name="pool")
            pooled_outputs.append(pooled)
        h_pool = tf.concat(pooled_outputs, 3)
        return tf.reshape(h_pool, [-1, self.num_filters_total])

In [5]:
data = {"train": None, "dev": None}
labels = [i for i in range(1,6)]
def format_label(yy):
    origin = [0]*len(labels)
    origin[labels.index(yy)] += 1
    return origin
for k in data:
    data[k] = pd.read_csv("data/za_data/ir_{}.csv".format(k), header=None, sep="\t", names=("a", "y"))
    data[k]["y"] = data[k]["y"].apply(format_label)
def batch_iter(df_gen, batch_size, shuffle=True):
    obs = len(df_gen)
    if shuffle:
        data_gen = df_gen.sample(frac=1).reset_index(drop=True)
    else:
        data_gen = df_gen.copy()
    batch_num = int(obs/batch_size)
    for j in range(batch_num):
        yield (data_gen["a"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist(),
        data_gen["y"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist())


In [8]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
t0 = time.time()
bert = BERT(model_file="model/roeberta_zh_L-24_H-1024_A-16/roberta_zh_large_model.ckpt",
            model_config="model/roeberta_zh_L-24_H-1024_A-16/bert_config_large.json",
            vocab_file="model/roeberta_zh_L-24_H-1024_A-16/vocab.txt")
new_data = {}
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a"]:
        new_data[k][c] = bert.to_vector(data[k][c].tolist(), text_len=32,out_array=False)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 1024*4
l2_reg_lambda =  0.01
net = TextCnn(emb_size=emb_dim)
test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")
keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

pool_out = net.predict(test_l)
h_drop = tf.nn.dropout(pool_out, keep_prob)
W = tf.get_variable("W", shape=[128*5, 5], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[5]), name="b")
l2_loss = tf.constant(0.0)
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
pred_index = tf.argmax(scores, 1, name="predictions")
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=test_y))+ l2_reg_lambda * l2_loss

correct_predictions = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={
                                                             test_l: np.array(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32),
                                                             keep_prob: 0.5})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32),
                                            keep_prob: 1.0})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpl2hkqglk', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff2e7350278>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_

INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/value/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (1024, 4096), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (4096,), *INI

INFO:tensorflow:  name = bert/encoder/layer_6/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_6/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow: 

INFO:tensorflow:  name = bert/encoder/layer_11/attention/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/attention/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/intermediate/dense/kernel:0, shape = (1024, 4096), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (4096,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/kernel:0, shape = (4096, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:ten

INFO:tensorflow:  name = bert/encoder/layer_16/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/self/value/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/self/value/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/output/dense/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_16/attention/output/LayerNorm/gamma:0, shape = (1

INFO:tensorflow:  name = bert/encoder/layer_20/intermediate/dense/bias:0, shape = (4096,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_20/output/dense/kernel:0, shape = (4096, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_20/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_20/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_20/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_21/attention/self/query/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_21/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_21/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_21/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  

INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
INFO:tensorflow:input_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:tensorflow:Could not find trained model in model_dir: /tmp/tmpl2hkqglk, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running infer on CPU
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (21128, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (1024, 

INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (1024, 4096

INFO:tensorflow:  name = bert/encoder/layer_8/output/dense/kernel:0, shape = (4096, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_8/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  

INFO:tensorflow:  name = bert/encoder/layer_13/attention/output/dense/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/attention/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/attention/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/attention/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/intermediate/dense/kernel:0, shape = (1024, 4096), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/intermediate/dense/bias:0, shape = (4096,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/output/dense/kernel:0, shape = (4096, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_13/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CK

INFO:tensorflow:  name = bert/encoder/layer_18/attention/self/query/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/self/key/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/self/value/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/self/value/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/output/dense/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_18/attention/output/LayerNorm/beta:0, shape = (1

INFO:tensorflow:  name = bert/encoder/layer_22/intermediate/dense/kernel:0, shape = (1024, 4096), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_22/intermediate/dense/bias:0, shape = (4096,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_22/output/dense/kernel:0, shape = (4096, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_22/output/dense/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_22/output/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_22/output/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_23/attention/self/query/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_23/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_23/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
INFO:tensor

```
bert = BERT(model_file="model/chinese_L-12_H-768_A-12/bert_model.ckpt",
           vocab_file="model/chinese_L-12_H-768_A-12/vocab.txt",
           model_config="model/chinese_L-12_H-768_A-12/bert_config.json")
```
epoch 12 train loss: 0.000885 | accuracy on validation data : 0.820000

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
t0 = time.time()
bert = BERT()
new_data = {}
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a"]:
        new_data[k][c] = bert.to_vector(data[k][c].tolist(), text_len=32,out_array=False)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)
tf.reset_default_graph()
emb_dim = 768*4
net = TextCnn(emb_size=emb_dim)
test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")
keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

pred = tf.nn.dropout(net.predict(test_l), keep_prob)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={
                                                             test_l: np.array(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32),
                                                             keep_prob: 0.7})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32),
                                            keep_prob: 1.0})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

In [None]:
t0 = time.time()
bert = BERT(model_file="result/bert_za201908_retrain/model.ckpt-200000",
           output_layer_ix=[-1,-2,-3,-4])
new_data = {}
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a"]:
        new_data[k][c] = bert.to_vector(data[k][c].apply(string_format).tolist(), text_len=32,out_array=False)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 768*4
l2_reg_lambda = 0.01
net = TextCnn(emb_size=emb_dim)
test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")
keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

pool_out = net.predict(test_l)
h_drop = tf.nn.dropout(pool_out, keep_prob)
W = tf.get_variable("W", shape=[128*5, 5], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[5]), name="b")
l2_loss = tf.constant(0.0)
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
pred_index = tf.argmax(scores, 1, name="predictions")
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=test_y))+ l2_reg_lambda * l2_loss

correct_predictions = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={
                                                             test_l: np.array(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32),
                                                             keep_prob: 0.5})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32),
                                            keep_prob: 1.0})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

``` 
kp:0.5  bert process data
vectoring use time:  15.044257767498493
     >>> train loss: 0.07506977766752243 accuracy: 0.59375
     >>> train loss: 0.048909083008766174 accuracy: 0.6953125
     >>> train loss: 0.044740110635757446 accuracy: 0.6875
     >>> train loss: 0.03389664739370346 accuracy: 0.7109375
epoch 1 train loss: 0.048536 | accuracy on validation data : 0.805000
     >>> train loss: 0.01825757510960102 accuracy: 0.7734375
     >>> train loss: 0.026247957721352577 accuracy: 0.734375
     >>> train loss: 0.018337253481149673 accuracy: 0.8125
     >>> train loss: 0.021368511021137238 accuracy: 0.75
epoch 2 train loss: 0.016787 | accuracy on validation data : 0.818000
     >>> train loss: 0.01860962063074112 accuracy: 0.828125
     >>> train loss: 0.011141505092382431 accuracy: 0.8359375
     >>> train loss: 0.011425125412642956 accuracy: 0.78125
     >>> train loss: 0.013101772405207157 accuracy: 0.8125
epoch 3 train loss: 0.006272 | accuracy on validation data : 0.826000
     >>> train loss: 0.005116019397974014 accuracy: 0.875
     >>> train loss: 0.007277045864611864 accuracy: 0.8203125
     >>> train loss: 0.008001158945262432 accuracy: 0.8125
     >>> train loss: 0.007509998045861721 accuracy: 0.828125
epoch 4 train loss: 0.005982 | accuracy on validation data : 0.834000
     >>> train loss: 0.0030762755777686834 accuracy: 0.9140625
     >>> train loss: 0.006004644557833672 accuracy: 0.8671875
     >>> train loss: 0.008559954352676868 accuracy: 0.8515625
     >>> train loss: 0.009463612921535969 accuracy: 0.796875
epoch 5 train loss: 0.004803 | accuracy on validation data : 0.835000
     >>> train loss: 0.003039425238966942 accuracy: 0.8828125
     >>> train loss: 0.003599965712055564 accuracy: 0.859375
     >>> train loss: 0.004600606393069029 accuracy: 0.859375
     >>> train loss: 0.00485622975975275 accuracy: 0.8515625
epoch 6 train loss: 0.004490 | accuracy on validation data : 0.822000
     >>> train loss: 0.004941962193697691 accuracy: 0.8828125
     >>> train loss: 0.003231907496228814 accuracy: 0.8984375
     >>> train loss: 0.0053574563935399055 accuracy: 0.84375
     >>> train loss: 0.0023482502438127995 accuracy: 0.921875
epoch 7 train loss: 0.005328 | accuracy on validation data : 0.831000
     >>> train loss: 0.003285817103460431 accuracy: 0.8828125
     >>> train loss: 0.004342362750321627 accuracy: 0.890625
     >>> train loss: 0.002926676766946912 accuracy: 0.90625
     >>> train loss: 0.0037858954165130854 accuracy: 0.8828125
epoch 8 train loss: 0.003514 | accuracy on validation data : 0.826000
     >>> train loss: 0.0017216089181602001 accuracy: 0.9375
     >>> train loss: 0.0019755407702177763 accuracy: 0.9296875
     >>> train loss: 0.0037505854852497578 accuracy: 0.890625
     >>> train loss: 0.0033016561064869165 accuracy: 0.8828125
epoch 9 train loss: 0.002278 | accuracy on validation data : 0.836000
     >>> train loss: 0.0017864663386717439 accuracy: 0.921875
     >>> train loss: 0.0026061220560222864 accuracy: 0.921875
     >>> train loss: 0.0028060737531632185 accuracy: 0.8828125
     >>> train loss: 0.0020726437214761972 accuracy: 0.9375
     >>> train loss: 0.0013095056638121605 accuracy: 0.9609375
epoch 10 train loss: 0.001310 | accuracy on validation data : 0.830000
     >>> train loss: 0.0018705592956393957 accuracy: 0.9296875
     >>> train loss: 0.0020614825189113617 accuracy: 0.921875
     >>> train loss: 0.0014348081313073635 accuracy: 0.9453125
     >>> train loss: 0.001880572410300374 accuracy: 0.921875
epoch 11 train loss: 0.002028 | accuracy on validation data : 0.836000
     >>> train loss: 0.002000199630856514 accuracy: 0.921875
     >>> train loss: 0.0020232589449733496 accuracy: 0.921875
     >>> train loss: 0.0022898486349731684 accuracy: 0.9140625
     >>> train loss: 0.0018085187766700983 accuracy: 0.9296875
epoch 12 train loss: 0.001973 | accuracy on validation data : 0.826000
--------------------------------------------
kp: 0.5 bert official define
-----------------------------
vectoring use time:  12.35144940763712
     >>> train loss: 0.09475432336330414 accuracy: 0.75
     >>> train loss: 0.0513455905020237 accuracy: 0.75
     >>> train loss: 0.04159151017665863 accuracy: 0.75
     >>> train loss: 0.05744253471493721 accuracy: 0.7578125
epoch 1 train loss: 0.047228 | accuracy on validation data : 0.831000
     >>> train loss: 0.02607971988618374 accuracy: 0.8359375
     >>> train loss: 0.03430609777569771 accuracy: 0.828125
     >>> train loss: 0.02104470692574978 accuracy: 0.859375
     >>> train loss: 0.021326502785086632 accuracy: 0.84375
epoch 2 train loss: 0.032559 | accuracy on validation data : 0.839000
     >>> train loss: 0.018701864406466484 accuracy: 0.828125
     >>> train loss: 0.015681995078921318 accuracy: 0.859375
     >>> train loss: 0.013140547089278698 accuracy: 0.859375
     >>> train loss: 0.015892578288912773 accuracy: 0.8359375
epoch 3 train loss: 0.016433 | accuracy on validation data : 0.843000
     >>> train loss: 0.004725405015051365 accuracy: 0.9140625
     >>> train loss: 0.014301339164376259 accuracy: 0.8671875
     >>> train loss: 0.024083158001303673 accuracy: 0.84375
     >>> train loss: 0.009123283438384533 accuracy: 0.8828125
epoch 4 train loss: 0.005266 | accuracy on validation data : 0.839000
     >>> train loss: 0.012660215608775616 accuracy: 0.859375
     >>> train loss: 0.008995825424790382 accuracy: 0.8984375
     >>> train loss: 0.004735670052468777 accuracy: 0.921875
     >>> train loss: 0.008059096522629261 accuracy: 0.875
epoch 5 train loss: 0.011702 | accuracy on validation data : 0.844000
     >>> train loss: 0.008984651416540146 accuracy: 0.8828125
     >>> train loss: 0.004859656561166048 accuracy: 0.9296875
     >>> train loss: 0.0051573156379163265 accuracy: 0.8828125
     >>> train loss: 0.006157728843390942 accuracy: 0.9140625
epoch 6 train loss: 0.003444 | accuracy on validation data : 0.841000
     >>> train loss: 0.004497092682868242 accuracy: 0.9375
     >>> train loss: 0.0019211445469409227 accuracy: 0.9765625
     >>> train loss: 0.005209655500948429 accuracy: 0.921875
     >>> train loss: 0.006452234461903572 accuracy: 0.921875
epoch 7 train loss: 0.003647 | accuracy on validation data : 0.837000
     >>> train loss: 0.003419815097004175 accuracy: 0.9375
     >>> train loss: 0.004360694903880358 accuracy: 0.9140625
     >>> train loss: 0.004815479274839163 accuracy: 0.9453125
     >>> train loss: 0.0038853969890624285 accuracy: 0.9296875
epoch 8 train loss: 0.008747 | accuracy on validation data : 0.834000
     >>> train loss: 0.0027257089968770742 accuracy: 0.9609375
     >>> train loss: 0.006493282970041037 accuracy: 0.890625
     >>> train loss: 0.002135947346687317 accuracy: 0.9609375
     >>> train loss: 0.0019239219836890697 accuracy: 0.9375
epoch 9 train loss: 0.002309 | accuracy on validation data : 0.840000
     >>> train loss: 0.005426147021353245 accuracy: 0.921875
     >>> train loss: 0.004002354573458433 accuracy: 0.9375
     >>> train loss: 0.006335402838885784 accuracy: 0.875
     >>> train loss: 0.0021866806782782078 accuracy: 0.953125
     >>> train loss: 0.006828084122389555 accuracy: 0.90625
epoch 10 train loss: 0.006828 | accuracy on validation data : 0.839000
     >>> train loss: 0.004033834673464298 accuracy: 0.9296875
     >>> train loss: 0.0022661664988845587 accuracy: 0.9375
     >>> train loss: 0.0015121626202017069 accuracy: 0.9609375
     >>> train loss: 0.005345101933926344 accuracy: 0.921875
epoch 11 train loss: 0.002026 | accuracy on validation data : 0.852000
     >>> train loss: 0.0008900924003683031 accuracy: 0.9765625
     >>> train loss: 0.0015406820457428694 accuracy: 0.96875
     >>> train loss: 0.0015488314675167203 accuracy: 0.953125
     >>> train loss: 0.001077835215255618 accuracy: 0.9765625
epoch 12 train loss: 0.001395 | accuracy on validation data : 0.847000
-------------------------------------------------
  kp：0.2  |  official
     >>> train loss: 0.11787640303373337 accuracy: 0.40625
     >>> train loss: 0.07069216668605804 accuracy: 0.5703125
     >>> train loss: 0.03906998410820961 accuracy: 0.7109375
     >>> train loss: 0.024448901414871216 accuracy: 0.6953125
epoch 1 train loss: 0.039669 | accuracy on validation data : 0.803000
     >>> train loss: 0.01668168231844902 accuracy: 0.78125
     >>> train loss: 0.011877933517098427 accuracy: 0.7265625
     >>> train loss: 0.014150372706353664 accuracy: 0.71875
     >>> train loss: 0.010724574327468872 accuracy: 0.7578125
epoch 2 train loss: 0.010269 | accuracy on validation data : 0.809000
     >>> train loss: 0.008597953245043755 accuracy: 0.7265625
     >>> train loss: 0.006259781774133444 accuracy: 0.7890625
     >>> train loss: 0.006634075660258532 accuracy: 0.7578125
     >>> train loss: 0.007653890177607536 accuracy: 0.7890625
epoch 3 train loss: 0.009245 | accuracy on validation data : 0.810000
     >>> train loss: 0.008001290261745453 accuracy: 0.7578125
     >>> train loss: 0.005629937630146742 accuracy: 0.78125
     >>> train loss: 0.008177677169442177 accuracy: 0.75
     >>> train loss: 0.00782821699976921 accuracy: 0.78125
epoch 4 train loss: 0.006854 | accuracy on validation data : 0.824000
     >>> train loss: 0.006280165631324053 accuracy: 0.8046875
     >>> train loss: 0.0067283459939062595 accuracy: 0.7421875
     >>> train loss: 0.006087408401072025 accuracy: 0.8046875
     >>> train loss: 0.0039936876855790615 accuracy: 0.84375
epoch 5 train loss: 0.006565 | accuracy on validation data : 0.825000
     >>> train loss: 0.0037957134190946817 accuracy: 0.875
     >>> train loss: 0.005007544532418251 accuracy: 0.8515625
     >>> train loss: 0.0047807833179831505 accuracy: 0.8203125
     >>> train loss: 0.0039978851564228535 accuracy: 0.8125
epoch 6 train loss: 0.004856 | accuracy on validation data : 0.830000
     >>> train loss: 0.0052361576817929745 accuracy: 0.8046875
     >>> train loss: 0.0035294932313263416 accuracy: 0.875
     >>> train loss: 0.005749555770307779 accuracy: 0.8125
     >>> train loss: 0.004931464791297913 accuracy: 0.7890625
epoch 7 train loss: 0.004831 | accuracy on validation data : 0.813000
     >>> train loss: 0.005109638441354036 accuracy: 0.78125
     >>> train loss: 0.0039123352617025375 accuracy: 0.875
     >>> train loss: 0.004311954136937857 accuracy: 0.8359375
     >>> train loss: 0.004124110098928213 accuracy: 0.8828125
epoch 8 train loss: 0.006129 | accuracy on validation data : 0.822000
     >>> train loss: 0.0032043266110122204 accuracy: 0.890625
     >>> train loss: 0.0033424142748117447 accuracy: 0.875
     >>> train loss: 0.0035498265642672777 accuracy: 0.875
     >>> train loss: 0.002972646616399288 accuracy: 0.8828125
epoch 9 train loss: 0.005482 | accuracy on validation data : 0.828000
     >>> train loss: 0.0043727257288992405 accuracy: 0.8359375
     >>> train loss: 0.0026472921017557383 accuracy: 0.9140625
     >>> train loss: 0.003663902636617422 accuracy: 0.875
     >>> train loss: 0.004347586072981358 accuracy: 0.7890625
     >>> train loss: 0.003121854504570365 accuracy: 0.84375
epoch 10 train loss: 0.003122 | accuracy on validation data : 0.831000
     >>> train loss: 0.0034719216637313366 accuracy: 0.875
     >>> train loss: 0.0035934383049607277 accuracy: 0.8203125
     >>> train loss: 0.0033225053921341896 accuracy: 0.875
     >>> train loss: 0.002540828660130501 accuracy: 0.890625
epoch 11 train loss: 0.003820 | accuracy on validation data : 0.832000
     >>> train loss: 0.003116561332717538 accuracy: 0.890625
     >>> train loss: 0.003071680199354887 accuracy: 0.8671875
     >>> train loss: 0.002822703681886196 accuracy: 0.859375
     >>> train loss: 0.003378871362656355 accuracy: 0.859375
epoch 12 train loss: 0.003358 | accuracy on validation data : 0.834000
------------------------------------
std cnn kp:0.5
     >>> train loss: 0.0695074275135994 accuracy: 0.578125
     >>> train loss: 0.039984483271837234 accuracy: 0.71875
     >>> train loss: 0.040676552802324295 accuracy: 0.7109375
     >>> train loss: 0.02280501276254654 accuracy: 0.78125
epoch 1 train loss: 0.024210 | accuracy on validation data : 0.791000
     >>> train loss: 0.01198399905115366 accuracy: 0.8671875
     >>> train loss: 0.010410970076918602 accuracy: 0.828125
     >>> train loss: 0.01247412245720625 accuracy: 0.796875
     >>> train loss: 0.016896555200219154 accuracy: 0.734375
epoch 2 train loss: 0.014450 | accuracy on validation data : 0.810000
     >>> train loss: 0.008788200095295906 accuracy: 0.84375
     >>> train loss: 0.005876186303794384 accuracy: 0.859375
     >>> train loss: 0.006475652568042278 accuracy: 0.859375
     >>> train loss: 0.004071613308042288 accuracy: 0.8828125
epoch 3 train loss: 0.004598 | accuracy on validation data : 0.826000
     >>> train loss: 0.003301768796518445 accuracy: 0.8828125
     >>> train loss: 0.0024637773167341948 accuracy: 0.9140625
     >>> train loss: 0.0056638168171048164 accuracy: 0.8046875
     >>> train loss: 0.0033402766566723585 accuracy: 0.890625
epoch 4 train loss: 0.003586 | accuracy on validation data : 0.815000
     >>> train loss: 0.0025534480810165405 accuracy: 0.9140625
     >>> train loss: 0.0021253477316349745 accuracy: 0.9375
     >>> train loss: 0.002296954160556197 accuracy: 0.90625
     >>> train loss: 0.0028084581717848778 accuracy: 0.8984375
epoch 5 train loss: 0.002475 | accuracy on validation data : 0.824000
     >>> train loss: 0.002466529607772827 accuracy: 0.8984375
     >>> train loss: 0.00104507093783468 accuracy: 0.9609375
     >>> train loss: 0.0012395622907206416 accuracy: 0.953125
     >>> train loss: 0.0025633189361542463 accuracy: 0.890625
epoch 6 train loss: 0.001713 | accuracy on validation data : 0.828000
     >>> train loss: 0.0017894967459142208 accuracy: 0.9296875
     >>> train loss: 0.001268803491257131 accuracy: 0.953125
     >>> train loss: 0.0019206092692911625 accuracy: 0.9375
     >>> train loss: 0.0014309383695945144 accuracy: 0.953125
epoch 7 train loss: 0.001239 | accuracy on validation data : 0.819000
     >>> train loss: 0.0014885644195601344 accuracy: 0.9609375
     >>> train loss: 0.0009339665994048119 accuracy: 0.9765625
     >>> train loss: 0.0017392165027558804 accuracy: 0.9375
     >>> train loss: 0.0009297654032707214 accuracy: 0.984375
epoch 8 train loss: 0.001668 | accuracy on validation data : 0.832000
     >>> train loss: 0.0013628567103296518 accuracy: 0.9453125
     >>> train loss: 0.0011574263917282224 accuracy: 0.953125
     >>> train loss: 0.001683688722550869 accuracy: 0.9609375
     >>> train loss: 0.0008765582460910082 accuracy: 0.953125
epoch 9 train loss: 0.001009 | accuracy on validation data : 0.834000
     >>> train loss: 0.0012232367880642414 accuracy: 0.9453125
     >>> train loss: 0.0013613043120130897 accuracy: 0.96875
     >>> train loss: 0.0010940784122794867 accuracy: 0.9765625
     >>> train loss: 0.0010852765990421176 accuracy: 0.9609375
     >>> train loss: 0.0008160439319908619 accuracy: 0.9765625
epoch 10 train loss: 0.000816 | accuracy on validation data : 0.835000
     >>> train loss: 0.0006857814732939005 accuracy: 0.984375
     >>> train loss: 0.0008728296961635351 accuracy: 0.9765625
     >>> train loss: 0.0017151515930891037 accuracy: 0.9375
     >>> train loss: 0.0011947823222726583 accuracy: 0.9765625
epoch 11 train loss: 0.002341 | accuracy on validation data : 0.829000
     >>> train loss: 0.0009318346856161952 accuracy: 0.9609375
     >>> train loss: 0.001642709830775857 accuracy: 0.9609375
     >>> train loss: 0.0008568078046664596 accuracy: 0.96875
     >>> train loss: 0.0012185584055259824 accuracy: 0.9609375
epoch 12 train loss: 0.000744 | accuracy on validation data : 0.827000
------------------------------------
[1,2,3,4,5], kp=0.9
    >>> train loss: 0.7448777556419373 accuracy: 0.6875
     >>> train loss: 0.7116219997406006 accuracy: 0.6484375
     >>> train loss: 0.3887731730937958 accuracy: 0.734375
     >>> train loss: 0.2584020793437958 accuracy: 0.734375
epoch 1 train loss: 0.361906 | accuracy on validation data : 0.786000
     >>> train loss: 0.19218268990516663 accuracy: 0.8125
     >>> train loss: 0.2144605666399002 accuracy: 0.796875
     >>> train loss: 0.13514816761016846 accuracy: 0.8046875
     >>> train loss: 0.21121054887771606 accuracy: 0.78125
epoch 2 train loss: 0.151048 | accuracy on validation data : 0.814000
     >>> train loss: 0.05621935799717903 accuracy: 0.9140625
     >>> train loss: 0.023957103490829468 accuracy: 0.9453125
     >>> train loss: 0.02733374945819378 accuracy: 0.921875
     >>> train loss: 0.03608512878417969 accuracy: 0.9140625
epoch 3 train loss: 0.051456 | accuracy on validation data : 0.824000
     >>> train loss: 0.030412200838327408 accuracy: 0.921875
     >>> train loss: 0.08929476141929626 accuracy: 0.859375
     >>> train loss: 0.026908326894044876 accuracy: 0.90625
     >>> train loss: 0.0211198627948761 accuracy: 0.9140625
epoch 4 train loss: 0.018101 | accuracy on validation data : 0.822000
     >>> train loss: 0.016169516369700432 accuracy: 0.9140625
     >>> train loss: 0.020483452826738358 accuracy: 0.9296875
     >>> train loss: 0.06284118443727493 accuracy: 0.875
     >>> train loss: 0.014206577092409134 accuracy: 0.9375
epoch 5 train loss: 0.029711 | accuracy on validation data : 0.807000
     >>> train loss: 0.020663822069764137 accuracy: 0.9375
     >>> train loss: 0.04558707773685455 accuracy: 0.8984375
     >>> train loss: 0.03554214537143707 accuracy: 0.90625
     >>> train loss: 0.017021359875798225 accuracy: 0.921875
epoch 6 train loss: 0.026442 | accuracy on validation data : 0.820000
     >>> train loss: 0.029871996492147446 accuracy: 0.9296875
     >>> train loss: 0.038220666348934174 accuracy: 0.9140625
     >>> train loss: 0.005664331838488579 accuracy: 0.953125
     >>> train loss: 0.009321191348135471 accuracy: 0.9375
epoch 7 train loss: 0.003124 | accuracy on validation data : 0.809000
     >>> train loss: 0.05178038775920868 accuracy: 0.8984375
     >>> train loss: 0.0314117968082428 accuracy: 0.9296875
     >>> train loss: 0.005701182410120964 accuracy: 0.96875
     >>> train loss: 0.0023414362221956253 accuracy: 0.9609375
epoch 8 train loss: 0.020216 | accuracy on validation data : 0.814000
     >>> train loss: 0.030860666185617447 accuracy: 0.9375
     >>> train loss: 0.006568047218024731 accuracy: 0.96875
     >>> train loss: 0.0049414378590881824 accuracy: 0.9609375
     >>> train loss: 0.0070254383608698845 accuracy: 0.9375
epoch 9 train loss: 0.000212 | accuracy on validation data : 0.818000
     >>> train loss: 0.0016863738419488072 accuracy: 0.984375
     >>> train loss: 0.0014361764770001173 accuracy: 0.9453125
     >>> train loss: 0.013289548456668854 accuracy: 0.9296875
     >>> train loss: 0.0032098479568958282 accuracy: 0.9609375
     >>> train loss: 0.001249045366421342 accuracy: 0.984375
epoch 10 train loss: 0.001249 | accuracy on validation data : 0.810000
     >>> train loss: 0.011472798883914948 accuracy: 0.9609375
     >>> train loss: 0.02246152237057686 accuracy: 0.9453125
     >>> train loss: 0.027778320014476776 accuracy: 0.9375
     >>> train loss: 0.019919412210583687 accuracy: 0.90625
epoch 11 train loss: 0.005049 | accuracy on validation data : 0.789000
     >>> train loss: 0.0014193174429237843 accuracy: 0.9765625
     >>> train loss: 0.018127691000699997 accuracy: 0.9453125
     >>> train loss: 0.003955288790166378 accuracy: 0.9609375
     >>> train loss: 0.0002539307752158493 accuracy: 0.984375
epoch 12 train loss: 0.019974 | accuracy on validation data : 0.819000
------------------------------------------
[2,3,4]  no dropout
     >>> train loss: 0.7653381824493408 accuracy: 0.5703125
     >>> train loss: 0.44429266452789307 accuracy: 0.7109375
     >>> train loss: 0.17649909853935242 accuracy: 0.84375
     >>> train loss: 0.20419982075691223 accuracy: 0.765625
epoch 1 train loss: 0.112230 | accuracy on validation data : 0.775000
     >>> train loss: 0.08667470514774323 accuracy: 0.8125
     >>> train loss: 0.1241530328989029 accuracy: 0.8515625
     >>> train loss: 0.03702837973833084 accuracy: 0.90625
     >>> train loss: 0.06485087424516678 accuracy: 0.8671875
epoch 2 train loss: 0.121282 | accuracy on validation data : 0.772000
     >>> train loss: 0.02777235582470894 accuracy: 0.9140625
     >>> train loss: 0.01025107130408287 accuracy: 0.9609375
     >>> train loss: 0.035694368183612823 accuracy: 0.9296875
     >>> train loss: 0.053515367209911346 accuracy: 0.890625
epoch 3 train loss: 0.030000 | accuracy on validation data : 0.793000
     >>> train loss: 0.012042324990034103 accuracy: 0.9453125
     >>> train loss: 0.024201467633247375 accuracy: 0.9296875
     >>> train loss: 0.027736825868487358 accuracy: 0.8984375
     >>> train loss: 0.007639187853783369 accuracy: 0.953125
epoch 4 train loss: 0.025528 | accuracy on validation data : 0.754000
     >>> train loss: 0.025070689618587494 accuracy: 0.8984375
     >>> train loss: 0.012690783478319645 accuracy: 0.953125
     >>> train loss: 0.015322178602218628 accuracy: 0.9453125
     >>> train loss: 0.027007227763533592 accuracy: 0.9296875
epoch 5 train loss: 0.021751 | accuracy on validation data : 0.812000
     >>> train loss: 0.009202755987644196 accuracy: 0.953125
     >>> train loss: 0.014724159613251686 accuracy: 0.9375
     >>> train loss: 0.015714352950453758 accuracy: 0.953125
     >>> train loss: 0.012392968870699406 accuracy: 0.9609375
epoch 6 train loss: 0.003374 | accuracy on validation data : 0.806000
     >>> train loss: 0.002100056502968073 accuracy: 0.96875
     >>> train loss: 0.00624361215159297 accuracy: 0.953125
     >>> train loss: 0.00027241790667176247 accuracy: 0.9921875
     >>> train loss: 0.007256372831761837 accuracy: 0.96875
epoch 7 train loss: 0.002360 | accuracy on validation data : 0.810000
     >>> train loss: 0.006177614443004131 accuracy: 0.96875
     >>> train loss: 0.0045621017925441265 accuracy: 0.984375
     >>> train loss: 0.0019234613282606006 accuracy: 0.984375
     >>> train loss: 0.006624326109886169 accuracy: 0.96875
epoch 8 train loss: 0.003148 | accuracy on validation data : 0.795000
     >>> train loss: 0.0009343068231828511 accuracy: 0.9921875
     >>> train loss: 0.0036030986811965704 accuracy: 0.9765625
     >>> train loss: 0.016020700335502625 accuracy: 0.921875
     >>> train loss: 0.005520661361515522 accuracy: 0.96875
epoch 9 train loss: 0.004616 | accuracy on validation data : 0.798000
     >>> train loss: 0.0039305114187300205 accuracy: 0.96875
     >>> train loss: 0.0016299310373142362 accuracy: 0.9921875
     >>> train loss: 0.003350811544805765 accuracy: 0.984375
     >>> train loss: 0.01199441310018301 accuracy: 0.96875
     >>> train loss: 0.010499448515474796 accuracy: 0.9609375
epoch 10 train loss: 0.010499 | accuracy on validation data : 0.810000
     >>> train loss: 0.001286052051000297 accuracy: 0.984375
     >>> train loss: 0.0 accuracy: 1.0
     >>> train loss: 0.005797619000077248 accuracy: 0.9765625
     >>> train loss: 7.275954144736474e-11 accuracy: 1.0
epoch 11 train loss: 0.000178 | accuracy on validation data : 0.803000
     >>> train loss: 0.001688427641056478 accuracy: 0.984375
     >>> train loss: 0.00829298049211502 accuracy: 0.9609375
     >>> train loss: 0.008936351165175438 accuracy: 0.9765625
     >>> train loss: 0.002738103736191988 accuracy: 0.96875
epoch 12 train loss: 0.000001 | accuracy on validation data : 0.811000
```

In [8]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
t0 = time.time()
bert = BERT(model_file="result/bert_za201908_big/model.ckpt-100000")
new_data = {}
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a"]:
        new_data[k][c] = bert.to_vector(data[k][c].tolist(), text_len=32,out_array=False)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 768*4
net = TextCnn(emb_size=emb_dim)
test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")
pred = net.predict(test_l)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={
                                                             test_l: np.array(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpcjyzxp3w', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff04693ac18>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_

INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CK

INFO:tensorflow:  name = bert/encoder/layer_6/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:te

INFO:tensorflow:  name = bert/encoder/layer_11/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:predictio

INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = 

INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FRO

INFO:tensorflow:  name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:prediction_loop marked as finished
INFO:tensorflow:prediction_loop marked as finished
vectoring use time:  15.525202754884958
     >>> train loss: 0.5689246654510498 accuracy: 0.734375
     >>> train loss: 0.6776866316795349 accuracy: 0.75
     >>> train loss: 0.430856317281723 accuracy: 0.7890625
     >>> train loss: 0.2832392454147339 

In [21]:
import requests
def string_format(x):
    return requests.get("http://39.108.171.231:8001/za_bot/q={}".format(x)).json()['text_split'].replace(" ", "")
string_format("你不单身，所以所以不不拥挤")

'你不单身，所以不拥挤'

In [14]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

t0 = time.time()
bert = BERT(model_file="result/bert_za201908_mini/model.ckpt-100000",
            model_config="model/za/bert_config.json",
           output_layer_ix=[-1,-2,-3])
new_data = {}
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a"]:
        new_data[k][c] = bert.to_vector(data[k][c].apply(string_format).tolist(), text_len=32,out_array=False)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 384*3
net = TextCnn(emb_size=emb_dim)
test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")
keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

pred = tf.nn.dropout(net.predict(test_l), keep_prob)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={
                                                             test_l: np.array(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32),
                                                             keep_prob: 0.5})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32),
                                             keep_prob: 1.0})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

     >>> train loss: 0.36229586601257324 accuracy: 0.5390625
     >>> train loss: 0.4907160997390747 accuracy: 0.6328125
     >>> train loss: 0.3091115951538086 accuracy: 0.65625
     >>> train loss: 0.32683953642845154 accuracy: 0.5625
epoch 1 train loss: 0.386557 | accuracy on validation data : 0.738000
     >>> train loss: 0.1670452058315277 accuracy: 0.5703125
     >>> train loss: 0.13762971758842468 accuracy: 0.671875
     >>> train loss: 0.15841025114059448 accuracy: 0.6796875
     >>> train loss: 0.13013812899589539 accuracy: 0.625
epoch 2 train loss: 0.091917 | accuracy on validation data : 0.767000
     >>> train loss: 0.12241031229496002 accuracy: 0.6796875
     >>> train loss: 0.09567783027887344 accuracy: 0.671875
     >>> train loss: 0.1298055648803711 accuracy: 0.609375
     >>> train loss: 0.08821520209312439 accuracy: 0.671875
epoch 3 train loss: 0.114873 | accuracy on validation data : 0.778000
     >>> train loss: 0.12911579012870789 accuracy: 0.546875
     >>> train 

```
vectoring use time:  19.930724401026964
     >>> train loss: 0.4211544394493103 accuracy: 0.6875
     >>> train loss: 0.3107902407646179 accuracy: 0.75
     >>> train loss: 0.3041752576828003 accuracy: 0.71875
     >>> train loss: 0.15610316395759583 accuracy: 0.7578125
epoch 1 train loss: 0.166255 | accuracy on validation data : 0.762000
     >>> train loss: 0.08697839081287384 accuracy: 0.8671875
     >>> train loss: 0.11021964251995087 accuracy: 0.8046875
     >>> train loss: 0.06882508099079132 accuracy: 0.8359375
     >>> train loss: 0.09185776859521866 accuracy: 0.8046875
epoch 2 train loss: 0.054762 | accuracy on validation data : 0.787000
     >>> train loss: 0.0406038761138916 accuracy: 0.90625
     >>> train loss: 0.05565176159143448 accuracy: 0.8984375
     >>> train loss: 0.015166942030191422 accuracy: 0.9453125
     >>> train loss: 0.06176426261663437 accuracy: 0.828125
epoch 3 train loss: 0.042354 | accuracy on validation data : 0.790000
     >>> train loss: 0.013793768361210823 accuracy: 0.9375
     >>> train loss: 0.018318209797143936 accuracy: 0.9296875
     >>> train loss: 0.024721544235944748 accuracy: 0.9453125
     >>> train loss: 0.031755685806274414 accuracy: 0.9140625
epoch 4 train loss: 0.029324 | accuracy on validation data : 0.802000
     >>> train loss: 0.005737480707466602 accuracy: 0.96875
     >>> train loss: 0.01985006406903267 accuracy: 0.921875
     >>> train loss: 0.004702536854892969 accuracy: 0.953125
     >>> train loss: 0.008613433688879013 accuracy: 0.96875
epoch 5 train loss: 0.036785 | accuracy on validation data : 0.793000
     >>> train loss: 0.01130962185561657 accuracy: 0.9453125
     >>> train loss: 0.012763692997395992 accuracy: 0.9609375
     >>> train loss: 0.01888938993215561 accuracy: 0.9296875
     >>> train loss: 0.011600819416344166 accuracy: 0.9296875
epoch 6 train loss: 0.015074 | accuracy on validation data : 0.768000
     >>> train loss: 0.008424262516200542 accuracy: 0.9765625
     >>> train loss: 0.03196059912443161 accuracy: 0.90625
     >>> train loss: 0.00019736637477762997 accuracy: 0.9921875
     >>> train loss: 0.014133868739008904 accuracy: 0.953125
epoch 7 train loss: 0.033024 | accuracy on validation data : 0.782000
     >>> train loss: 0.02562567964196205 accuracy: 0.8984375
     >>> train loss: 0.02925855666399002 accuracy: 0.875
     >>> train loss: 0.01405893824994564 accuracy: 0.921875
     >>> train loss: 0.009941834956407547 accuracy: 0.9453125
epoch 8 train loss: 0.015827 | accuracy on validation data : 0.815000
     >>> train loss: 0.019841350615024567 accuracy: 0.9140625
     >>> train loss: 0.018721258267760277 accuracy: 0.9453125
     >>> train loss: 0.0007191195618361235 accuracy: 0.9921875
     >>> train loss: 0.0074624791741371155 accuracy: 0.96875
epoch 9 train loss: 0.006695 | accuracy on validation data : 0.803000
     >>> train loss: 0.014011423103511333 accuracy: 0.9609375
     >>> train loss: 0.005315536633133888 accuracy: 0.9609375
     >>> train loss: 0.005875124596059322 accuracy: 0.96875
     >>> train loss: 0.021586893126368523 accuracy: 0.90625
     >>> train loss: 0.0074001336470246315 accuracy: 0.9609375
epoch 10 train loss: 0.007400 | accuracy on validation data : 0.823000
     >>> train loss: 0.01648971252143383 accuracy: 0.9375
     >>> train loss: 0.00036133729736320674 accuracy: 0.984375
     >>> train loss: 0.005254517775028944 accuracy: 0.984375
     >>> train loss: 0.020842259749770164 accuracy: 0.9296875
epoch 11 train loss: 0.004946 | accuracy on validation data : 0.819000
     >>> train loss: 0.00013206389849074185 accuracy: 0.9921875
     >>> train loss: 0.010627894662320614 accuracy: 0.9453125
     >>> train loss: 0.005015386268496513 accuracy: 0.953125
     >>> train loss: 0.004027890972793102 accuracy: 0.96875
epoch 12 train loss: 0.010968 | accuracy on validation data : 0.793000
```

In [12]:
bert = BERT(model_file="result/bert_za201908_big/model.ckpt-100000")
tf.reset_default_graph()

net = TextCnn()
test_l = tf.placeholder(tf.float32, [None, 30, 768], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")

pred = net.predict(test_l)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=2e-5).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(1):
        for input_l, input_y in batch_iter(data["train"], batch_size=128, shuffle=True):
            cost, _ = sess.run([loss, optimizer], feed_dict={test_l: bert.to_vector(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
#         accuracy = []
#         for val_l, val_y in batch_iter(data["dev"], batch_size=128, shuffle=False):
#         accuracy.append()
        p, a = sess.run([pred_index, acc], feed_dict={test_l: bert.to_vector(data["dev"]["a"].tolist()).astype(np.float32), 
                                                      test_y: np.array(data["dev"]["y"].tolist()).astype(np.float32)})
#         print(" "*4, "train loss: %f" % (cost/len(input_l)))
#         print(" "*4, "epoch %d accuracy on validation data : %f" % (e+1, np.mean(accuracy)))

     train loss: 0.315904
     epoch 1 accuracy on validation data : 0.258929
     train loss: 0.346664
     epoch 2 accuracy on validation data : 0.265625
     train loss: 0.305899
     epoch 3 accuracy on validation data : 0.270089
     train loss: 0.282546
     epoch 4 accuracy on validation data : 0.274554
     train loss: 0.270311
     epoch 5 accuracy on validation data : 0.276786
     train loss: 0.318003
     epoch 6 accuracy on validation data : 0.282366
     train loss: 0.310634
     epoch 7 accuracy on validation data : 0.286830
     train loss: 0.311353
     epoch 8 accuracy on validation data : 0.294643
     train loss: 0.251356
     epoch 9 accuracy on validation data : 0.296875
     train loss: 0.283258
     epoch 10 accuracy on validation data : 0.308036
     train loss: 0.247088
     epoch 11 accuracy on validation data : 0.316964
     train loss: 0.284982
     epoch 12 accuracy on validation data : 0.311384


In [32]:
bert = BERT(model_file="result/bert_za201908_big/model.ckpt-100000", output_index=-3)
tf.reset_default_graph()

net = TextCnn()
test_l = tf.placeholder(tf.float32, [None, 30, 768], name="input_left")
test_y = tf.placeholder(tf.float32, [None, 5], name="input_label")

pred = net.predict(test_l)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(2):
        for input_l, input_y in batch_iter(data["train"], batch_size=128, shuffle=True):
            cost, _ = sess.run([loss, optimizer], feed_dict={test_l: bert.to_vector(input_l).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
            print(" "*4, ">>> loss: ", cost/len(input_l), "  train acc:", acc)
        p, a = sess.run([pred_index, acc], feed_dict={test_l: bert.to_vector(data["dev"]["a"].tolist()).astype(np.float32), 
                                                      test_y: np.array(data["dev"]["y"].tolist()).astype(np.float32)})
        print(">> dev acc: ", a)

>>> loss:  0.5250835418701172
>>> loss:  0.5666189193725586
>>> loss:  0.4745863676071167
>>> loss:  0.4950261116027832
>>> loss:  0.5178602933883667
>>> loss:  0.4716566503047943
>>> loss:  0.474394291639328
>>> loss:  0.4560058116912842
>>> loss:  0.4367711544036865
>>> loss:  0.38992831110954285
>>> loss:  0.4097004532814026
>>> loss:  0.3871026039123535
>>> loss:  0.39631834626197815
>>> loss:  0.36509668827056885
>>> loss:  0.32337287068367004
>>> loss:  0.3026386499404907
>>> loss:  0.32496437430381775
>>> loss:  0.3119697570800781
>>> loss:  0.27425146102905273
>>> loss:  0.270637184381485
>>> loss:  0.2334231436252594
>>> loss:  0.2620473802089691
>>> loss:  0.2573532462120056
>>> loss:  0.24385878443717957
>>> loss:  0.21170853078365326
>>> loss:  0.2256544828414917
>>> loss:  0.19569729268550873
>>> loss:  0.1902591437101364
>>> loss:  0.20787805318832397
>>> loss:  0.17820274829864502
>>> loss:  0.18504628539085388
>>> loss:  0.17889714241027832
>>> loss:  0.1504731476306915

In [29]:
data["dev"]["y_model"] = p+1
data["dev"]["y_model"].value_counts(normalize=True), pd.read_csv("data/za_data/ir_dev.csv", header=None, sep="\t", names=("a", "y"))["y"].value_counts(normalize=True)

(2    0.674
 4    0.214
 1    0.079
 3    0.032
 5    0.001
 Name: y_model, dtype: float64, 1    0.386
 5    0.221
 3    0.166
 2    0.137
 4    0.090
 Name: y, dtype: float64)

In [13]:
data["dev"].head()

Unnamed: 0,a,y
0,嗯办好,"[1, 0, 0, 0, 0]"
1,你说的是就是什么镇,"[0, 0, 0, 0, 1]"
2,嗯上次还没有,"[0, 1, 0, 0, 0]"
3,嗯好你核实多年了,"[1, 0, 0, 0, 0]"
4,我不是我不是要形成那个配偶我是替我妹妹看一下我妹妹是单身,"[0, 0, 1, 0, 0]"


### baseline

In [18]:
def rand_y(_):
    r = np.random.rand()
    if r <= 0.2:
        return [1, 0, 0, 0, 0]
    elif r <= 0.4:
        return [0, 1, 0, 0, 0]
    elif r <= 0.6:
        return [0, 0, 1, 0, 0]
    elif r <= 0.8:
        return [0, 0, 0, 1, 0]
    else:
        return [0, 0, 0, 0, 1]
data["dev"]["y_rand"] = data["dev"]["y"].apply(rand_y)
(data["dev"]["y_rand"] == data["dev"]["y"]).mean()

0.19