### jupyter 模式下tf.flags传参的方式报错

In [1]:
import codecs
import collections
import json
import os, re, time

import modeling
import tokenization
import tensorflow as tf
import numpy as np
import pandas as pd

tf.logging.set_verbosity(tf.logging.ERROR)

In [2]:
from extract_features import InputExample, InputFeatures
from extract_features import input_fn_builder, model_fn_builder, _truncate_seq_pair

def convert_examples_to_features(examples, seq_length, tokenizer, align=False):
  features = []
  for (ex_index, example) in enumerate(examples):
    tokens_a = tokenizer.tokenize(example.text_a)

    tokens_b = None
    if example.text_b:
      tokens_b = tokenizer.tokenize(example.text_b)

    if tokens_b:
      _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
    else:
      if len(tokens_a) > seq_length - 2:
        tokens_a = tokens_a[0:(seq_length - 2)]
    tokens = []
    input_type_ids = []
    tokens.append("[CLS]")
    input_type_ids.append(0)
    for token in tokens_a:
      tokens.append(token)
      input_type_ids.append(0)
    tokens.append("[SEP]")
    input_type_ids.append(0)

    if tokens_b:
      for token in tokens_b:
        tokens.append(token)
        input_type_ids.append(1)
      tokens.append("[SEP]")
      input_type_ids.append(1)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_ids)
    while len(input_ids) < seq_length:
      input_ids.append(0)
      input_mask.append(0 if not align else 1)
      input_type_ids.append(0)

    assert len(input_ids) == seq_length
    assert len(input_mask) == seq_length
    assert len(input_type_ids) == seq_length

    if ex_index < 5:
      tf.logging.info("*** Example ***")
      tf.logging.info("unique_id: %s" % (example.unique_id))
      tf.logging.info("tokens: %s" % " ".join(
          [tokenization.printable_text(x) for x in tokens]))
      tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
      tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
      tf.logging.info(
          "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids]))

    features.append(
        InputFeatures(
            unique_id=example.unique_id,
            tokens=tokens,
            input_ids=input_ids,
            input_mask=input_mask,
            input_type_ids=input_type_ids))
  return features

In [3]:
class BERT(object):
    def __init__(self, model_file="model/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt",
                 model_json="model/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json",
                 model_voca="model/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt",
                 output_layer_ix= [-1, -2, -3, -4]):
        self.model_file = model_file
        self.layer_indexes = output_layer_ix       
        bert_config = modeling.BertConfig.from_json_file(model_json)
        self.tokenizer = tokenization.FullTokenizer(vocab_file=model_voca, do_lower_case=False)
        self.sys_tokens = ['[CLS]', '[SEP]']
        model_fn = model_fn_builder(
            bert_config=bert_config,
            init_checkpoint=self.model_file,
            layer_indexes=self.layer_indexes,
            use_tpu=False, use_one_hot_embeddings=False)
        is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
        run_config = tf.contrib.tpu.RunConfig(
            master=None,
            tpu_config=tf.contrib.tpu.TPUConfig(num_shards=8, per_host_input_for_training=is_per_host))
        self.estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False, model_fn=model_fn, config=run_config, predict_batch_size=32)
    
    def to_vector(self, input_list, text_len=32, out_array=True):
        examples = []
        is_match = (isinstance(input_list[0], list) and len(input_list[0]) == 2)
        if is_match:
            for (j, s) in enumerate(input_list):
                examples += [InputExample(unique_id=j, text_a=s[0], text_b=s[1])]
        else:
            for (j, s) in enumerate(input_list):
                examples += [InputExample(unique_id=j, text_a=s, text_b=None)]
        features = convert_examples_to_features(examples=examples, seq_length=text_len, tokenizer=self.tokenizer, align=False)
        vec_output = []
        input_fn = input_fn_builder(features=features, seq_length=text_len)
        res = {int(r["unique_id"]): r for r in self.estimator.predict(input_fn, yield_single_examples=True)}
        for u in features:
            uu = res[u.unique_id]
            u_vec = []
            for (i, token) in enumerate(u.tokens+["[PAD]"]*(text_len-len(u.tokens))):
                if token not in self.sys_tokens:
                    v = []
                    for ck in range(len(self.layer_indexes)):
                        v += [round(float(x), 6) for x in uu["layer_output_%d" % ck][i:(i + 1)].flat]
                    u_vec.append(v)
            vec_output.append(u_vec)
        return np.array(vec_output) if out_array else vec_output

In [4]:
data = {"train": None, "dev": None}
def format_label(yy):
    origin = [0, 0]
    origin[yy] += 1
    return origin
for k in data:
    data[k] = pd.read_csv("data/za_data/kd_{}.csv".format(k), header=None, sep="\t", names=("a", "b", "y"))
    data[k]["y"] = data[k]["y"].apply(format_label)
def batch_iter(df_gen, batch_size, shuffle=True):
    obs = len(df_gen)
    if shuffle:
        data_gen = df_gen.sample(frac=1).reset_index(drop=True)
    else:
        data_gen = df_gen.copy()
    batch_num = int(obs/batch_size)
    for j in range(batch_num):
        yield (data_gen["a"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist(),
        data_gen["b"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist(),
        data_gen["y"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist())

In [5]:
class FCLayer(object):
    def __init__(self, num_in, num_out):
        self.num_in = num_in
        self.num_out = num_out
        self.weight = tf.Variable(tf.random_normal([num_in, num_out]))
        self.bias = tf.Variable(tf.random_normal([num_out]))

    def ops(self, input_x):
        out_without_bias = tf.matmul(input_x, self.weight)
        output = tf.nn.bias_add(out_without_bias, self.bias)
        return output

In [6]:
data["dev"]["y"].astype(str).value_counts()

[1, 0]    2971
[0, 1]    2177
Name: y, dtype: int64

In [7]:
class MlpMatch(object):
    def __init__(self, emb_size=768):
        self.n_class = 2
        self.emb_size = emb_size
        self.hidden_size = 128
        self.bow_layer = FCLayer(self.emb_size, self.hidden_size)
        self.fc_layer = FCLayer(2 * self.hidden_size, self.n_class)
     
    def predict(self, left_slots, right_slots):
        left, right = left_slots, right_slots
        left_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_sum(left, axis=1)))
        right_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_sum(right, axis=1)))
        concat = tf.concat([left_bow, right_bow], -1)
        pred = self.fc_layer.ops(concat)
        return pred

In [8]:
# all_res = []
# for j in range(12):
bert = BERT(model_file="model/chinese_L-12_H-768_A-12/bert_model.ckpt",
            model_json="model/chinese_L-12_H-768_A-12/bert_config.json",
            model_voca="model/chinese_L-12_H-768_A-12/vocab.txt",
            output_layer_ix= [-1])
new_data = {}
text_list = []
for k in data.keys():
     for c in ["a", "b"]:
            text_list += data[k][c].tolist()
text_uniq = list(set(text_list))
text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a", "b"]:
        new_data[k][c] = data[k][c].map(text2vec)

tf.reset_default_graph()
emb_dim = 768*1
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 1, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 1, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(6):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32)[:, 0:1, :], 
                                                                             test_r: np.array(input_r).astype(np.float32)[:, 0:1, :], 
                                                                             test_y: np.array(input_y).astype(np.float32)})
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32)[:, 0:1, :], 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32)[:, 0:1, :], 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch {} dev acc {}".format(e+1, accuracy))
#         tk += [accuracy]
#     all_res += [tk]

epoch 1 dev acc 0.5674048066139221
epoch 2 dev acc 0.5918803215026855
epoch 3 dev acc 0.6002330780029297
epoch 4 dev acc 0.6076146364212036
epoch 5 dev acc 0.6130536198616028
epoch 6 dev acc 0.6056721210479736


In [9]:
tf.reset_default_graph()
emb_dim = 768*1
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 1, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 1, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(6):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32)[:, -1:, :], 
                                                                             test_r: np.array(input_r).astype(np.float32)[:, -1:, :], 
                                                                             test_y: np.array(input_y).astype(np.float32)})
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32)[:, -1:, :], 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32)[:, -1:, :], 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch {} dev acc {}".format(e+1, accuracy))

epoch 1 dev acc 0.6078088283538818
epoch 2 dev acc 0.6095570921897888
epoch 3 dev acc 0.6295648813247681
epoch 4 dev acc 0.6404429078102112
epoch 5 dev acc 0.6392773985862732
epoch 6 dev acc 0.6470474004745483


In [10]:
class MlpMatch(object):
    def __init__(self, emb_size=768):
        self.n_class = 2
        self.emb_size = emb_size
        self.hidden_size = 128
        self.bow_layer = FCLayer(self.emb_size, self.hidden_size)
        self.fc_layer = FCLayer(2 * self.hidden_size, self.n_class)
     
    def predict(self, left_slots, right_slots):
        left, right = left_slots, right_slots
        left_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_mean(left, axis=1)))
        right_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_mean(right, axis=1)))
        concat = tf.concat([left_bow, right_bow], -1)
        pred = self.fc_layer.ops(concat)
        return pred
tf.reset_default_graph()
emb_dim = 768*1
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(6):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch {} dev acc {}".format(e+1, accuracy))

epoch 1 dev acc 0.6200466156005859
epoch 2 dev acc 0.6379176378250122
epoch 3 dev acc 0.6423853635787964
epoch 4 dev acc 0.6353923678398132
epoch 5 dev acc 0.6482129096984863
epoch 6 dev acc 0.6449106335639954


In [11]:
class MlpMatch(object):
    def __init__(self, emb_size=768):
        self.n_class = 2
        self.emb_size = emb_size
        self.hidden_size = 128
        self.bow_layer = FCLayer(self.emb_size, self.hidden_size)
        self.fc_layer = FCLayer(2 * self.hidden_size, self.n_class)
     
    def predict(self, left_slots, right_slots):
        left, right = left_slots, right_slots
        left_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_sum(left, axis=1)))
        right_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_sum(right, axis=1)))
        concat = tf.concat([left_bow, right_bow], -1)
        pred = self.fc_layer.ops(concat)
        return pred
tf.reset_default_graph()
emb_dim = 768*1
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(6):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch {} dev acc {}".format(e+1, accuracy))

epoch 1 dev acc 0.6114996075630188
epoch 2 dev acc 0.6283993721008301
epoch 3 dev acc 0.6418026685714722
epoch 4 dev acc 0.6441336274147034
epoch 5 dev acc 0.6482129096984863
epoch 6 dev acc 0.6557886600494385


In [12]:
[r[-1] for r in all_res]

[0.6454934,
 0.6404429,
 0.6439394,
 0.63325566,
 0.6618104,
 0.6478244,
 0.65909094,
 0.62975913,
 0.6377234,
 0.62975913,
 0.64180267,
 0.6509324]

In [13]:
[sum(r)/6 for r in all_res]

[0.6245143711566925,
 0.6296620070934296,
 0.6280432442824045,
 0.6227337519327799,
 0.6383708814779917,
 0.6215682427088419,
 0.6323815087477366,
 0.62143874168396,
 0.6245143612225851,
 0.6173918644587199,
 0.6301476260026296,
 0.6341621379057566]

```		
# 之前的实验结果 lr=2e-5
       train loss: 0.696433
     epoch 1 accuracy on validation data : 0.517383
	 
	    train loss: 0.867437
     epoch 2 accuracy on validation data : 0.524805

```

# 交友+邀约语料效果

In [None]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
t0 = time.time()
bert = BERT(model_file="result/bert_za201908_0829/model.ckpt-288000")
new_data = {}
text_list = []
for k in data.keys():
     for c in ["a", "b"]:
            text_list += data[k][c].tolist()
text_uniq = list(set(text_list))
text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a", "b"]:
        new_data[k][c] = data[k][c].map(text2vec)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 768*4
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

vectoring use time:  8.477753542363644
     >>> train loss: 1.5904645919799805 accuracy: 0.5234375
     >>> train loss: 1.5448942184448242 accuracy: 0.5390625
     >>> train loss: 1.1048426628112793 accuracy: 0.671875
     >>> train loss: 1.4297349452972412 accuracy: 0.5703125
     >>> train loss: 1.251166582107544 accuracy: 0.625
     >>> train loss: 0.97022545337677 accuracy: 0.640625
     >>> train loss: 1.5535123348236084 accuracy: 0.5859375
     >>> train loss: 1.1752512454986572 accuracy: 0.5859375
     >>> train loss: 1.1263431310653687 accuracy: 0.5859375
     >>> train loss: 1.3652117252349854 accuracy: 0.6171875
     >>> train loss: 1.0786538124084473 accuracy: 0.6640625
     >>> train loss: 1.3199524879455566 accuracy: 0.5234375
     >>> train loss: 0.9020547270774841 accuracy: 0.65625
     >>> train loss: 0.9874982833862305 accuracy: 0.640625
     >>> train loss: 0.8561273813247681 accuracy: 0.5859375
     >>> train loss: 1.133991003036499 accuracy: 0.5390625
epoch 1 train 

```
mix 237000
epoch 12 train loss: 0.311918 | accuracy on validation data : 0.650738
mix 28000
epoch 12 train loss: 0.199452 | accuracy on validation data : 0.673077
```

## 检查predict的概率结果

In [8]:
t0 = time.time()
bert = BERT()
new_data = {}
text_list = []
for k in data.keys():
     for c in ["a", "b"]:
            text_list += data[k][c].tolist()
text_uniq = list(set(text_list))
text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a", "b"]:
        new_data[k][c] = data[k][c].map(text2vec)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 768*4
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
y_true = tf.argmax(test_y, 1)
correct_pred = tf.equal(pred_index, y_true)
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

res_check = []
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(3):
        print(">>>> epoch {}".format(e+1))
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=256, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], 
                                          feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                     test_r: np.array(input_r).astype(np.float32), 
                                                     test_y: np.array(input_y).astype(np.float32)})
        res_check.append(sess.run([y_true, pred_index, pred_prob], 
                                  feed_dict={test_l: np.array(new_data["train"]["a"].tolist()).astype(np.float32), 
                                             test_r: np.array(new_data["train"]["b"].tolist()).astype(np.float32), 
                                             test_y: np.array(new_data["train"]["y"].tolist()).astype(np.float32)}))

vectoring use time:  5.270684786140919
>>>> epoch 1
>>>> epoch 2
>>>> epoch 3


In [15]:
y = np.bincount(res_check[-1][0])
ii = np.nonzero(y)[0]
np.vstack((ii, y[ii])).T

array([[    0, 11767],
       [    1,  8824]])

In [16]:
res_check[-1][1]

array([0, 0, 0, ..., 1, 1, 0])

In [17]:
y = np.bincount(res_check[-1][1])
ii = np.nonzero(y)[0]
np.vstack((ii, y[ii])).T

array([[    0, 13089],
       [    1,  7502]])

In [19]:
import pandas as pd

b = pd.DataFrame(res_check[-1][-1])

In [21]:
b[0].value_counts()

1.000000e+00    12814
0.000000e+00     5889
9.999999e-01       24
9.999995e-01        8
9.999994e-01        7
9.999998e-01        6
9.999993e-01        5
9.999996e-01        4
9.999980e-01        3
9.999987e-01        3
9.999986e-01        3
9.999992e-01        3
9.999909e-01        2
9.999976e-01        2
9.999979e-01        2
7.790630e-29        2
2.576094e-14        2
3.724624e-01        2
9.999964e-01        2
9.999989e-01        2
4.755071e-07        2
9.999962e-01        2
9.999990e-01        2
2.393671e-17        2
7.577560e-32        2
9.999933e-01        2
9.999753e-01        2
3.827042e-03        1
1.607436e-37        1
9.484119e-01        1
                ...  
7.485992e-10        1
1.158146e-37        1
3.747597e-03        1
2.931484e-21        1
1.425878e-34        1
3.529735e-07        1
2.668193e-19        1
9.902498e-01        1
5.613463e-12        1
6.052390e-30        1
2.931820e-21        1
6.465718e-01        1
1.220690e-32        1
1.170703e-22        1
4.150280e-

In [None]:
b.head(30)

In [9]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
t0 = time.time()
bert = BERT()
new_data = {}
text_list = []
for k in data.keys():
     for c in ["a", "b"]:
            text_list += data[k][c].tolist()
text_uniq = list(set(text_list))
text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a", "b"]:
        new_data[k][c] = data[k][c].map(text2vec)
t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)

tf.reset_default_graph()
emb_dim = 768*4
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

     >>> train loss: 2.065300226211548 accuracy: 0.5390625
     >>> train loss: 2.092838764190674 accuracy: 0.5859375
     >>> train loss: 1.9274849891662598 accuracy: 0.625
     >>> train loss: 1.8183304071426392 accuracy: 0.5703125
     >>> train loss: 1.059439778327942 accuracy: 0.6484375
     >>> train loss: 1.293471336364746 accuracy: 0.65625
     >>> train loss: 1.3458620309829712 accuracy: 0.609375
     >>> train loss: 1.07334303855896 accuracy: 0.609375
     >>> train loss: 1.309714913368225 accuracy: 0.6015625
     >>> train loss: 1.3705319166183472 accuracy: 0.59375
     >>> train loss: 1.6692157983779907 accuracy: 0.5546875
     >>> train loss: 1.33089017868042 accuracy: 0.625
     >>> train loss: 1.111160397529602 accuracy: 0.6640625
     >>> train loss: 1.2898821830749512 accuracy: 0.6484375
     >>> train loss: 1.1710504293441772 accuracy: 0.671875
     >>> train loss: 0.99797123670578 accuracy: 0.71875
epoch 1 train loss: 0.997971 | accuracy on validation data : 0.641026

     >>> train loss: 0.30512410402297974 accuracy: 0.734375
     >>> train loss: 0.3931533098220825 accuracy: 0.703125
     >>> train loss: 0.27679377794265747 accuracy: 0.7421875
     >>> train loss: 0.3370121121406555 accuracy: 0.734375
     >>> train loss: 0.2972857356071472 accuracy: 0.75
     >>> train loss: 0.231839120388031 accuracy: 0.8125
     >>> train loss: 0.2992209494113922 accuracy: 0.75
     >>> train loss: 0.38652878999710083 accuracy: 0.671875
     >>> train loss: 0.3392358124256134 accuracy: 0.6953125
     >>> train loss: 0.427962064743042 accuracy: 0.65625
     >>> train loss: 0.2573956251144409 accuracy: 0.7265625
     >>> train loss: 0.282268226146698 accuracy: 0.7734375
     >>> train loss: 0.44135582447052 accuracy: 0.6640625
     >>> train loss: 0.2855936586856842 accuracy: 0.734375
epoch 9 train loss: 0.285594 | accuracy on validation data : 0.669580
     >>> train loss: 0.20667220652103424 accuracy: 0.7890625
     >>> train loss: 0.2889723777770996 accuracy: 0

In [14]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
t0 = time.time()
bert = BERT(model_file="model/chinese_L-12_H-768_A-12/bert_model.ckpt",
            model_json="model/chinese_L-12_H-768_A-12/bert_config.json",
            model_voca="model/chinese_L-12_H-768_A-12/vocab.txt",
            output_layer_ix= [4, 5, 6])
new_data = {}
text_list = []
for k in data.keys():
     for c in ["a", "b"]:
            text_list += data[k][c].tolist()
text_uniq = list(set(text_list))
text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a", "b"]:
        new_data[k][c] = data[k][c].map(text2vec)

t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)
tf.reset_default_graph()
emb_dim = 768*3
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

vectoring use time:  5.287542726844549
     >>> train loss: 2.159787893295288 accuracy: 0.4921875
     >>> train loss: 1.3735638856887817 accuracy: 0.578125
     >>> train loss: 1.639953374862671 accuracy: 0.53125
     >>> train loss: 1.460906744003296 accuracy: 0.53125
     >>> train loss: 1.195788025856018 accuracy: 0.6015625
     >>> train loss: 1.089930772781372 accuracy: 0.59375
     >>> train loss: 1.1831101179122925 accuracy: 0.6015625
     >>> train loss: 1.0616812705993652 accuracy: 0.640625
     >>> train loss: 0.970866322517395 accuracy: 0.6171875
     >>> train loss: 1.2295063734054565 accuracy: 0.5703125
     >>> train loss: 1.3567802906036377 accuracy: 0.5390625
     >>> train loss: 1.0961774587631226 accuracy: 0.578125
     >>> train loss: 0.9736884832382202 accuracy: 0.59375
     >>> train loss: 0.84832763671875 accuracy: 0.6015625
     >>> train loss: 0.854209840297699 accuracy: 0.6953125
     >>> train loss: 0.9050958156585693 accuracy: 0.5703125
epoch 1 train loss: 0

     >>> train loss: 0.3153820037841797 accuracy: 0.703125
     >>> train loss: 0.29123079776763916 accuracy: 0.7109375
     >>> train loss: 0.2946123480796814 accuracy: 0.703125
     >>> train loss: 0.37641799449920654 accuracy: 0.6171875
     >>> train loss: 0.20550036430358887 accuracy: 0.7421875
     >>> train loss: 0.2464301884174347 accuracy: 0.71875
     >>> train loss: 0.31087058782577515 accuracy: 0.65625
     >>> train loss: 0.2533394694328308 accuracy: 0.734375
     >>> train loss: 0.24885764718055725 accuracy: 0.734375
     >>> train loss: 0.3323311507701874 accuracy: 0.7109375
     >>> train loss: 0.15576530992984772 accuracy: 0.765625
     >>> train loss: 0.24162507057189941 accuracy: 0.65625
     >>> train loss: 0.35949668288230896 accuracy: 0.6640625
     >>> train loss: 0.24265816807746887 accuracy: 0.7109375
     >>> train loss: 0.28265562653541565 accuracy: 0.65625
epoch 9 train loss: 0.282656 | accuracy on validation data : 0.660256
     >>> train loss: 0.1792506277

### 不同层的组合得到的结果
- -1,-2，-3，-4    0.683566
- 4,5,6    0.659479      

In [11]:
t0 = time.time()
bert = BERT(model_file="result/bert_za201908_big/model.ckpt-100000")
new_data = {}
text_list = []
for k in data.keys():
     for c in ["a", "b"]:
            text_list += data[k][c].tolist()
text_uniq = list(set(text_list))
text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
for k in data.keys():
    new_data[k] = data[k].copy()
    for c in ["a", "b"]:
        new_data[k][c] = data[k][c].map(text2vec)
#         new_data[k][c] = bert.to_vector(data[k][c].tolist(), out_array=False)

t1 = time.time()
print("vectoring use time: ", t1/60-t0/60)
tf.reset_default_graph()
emb_dim = 768*4
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

vectoring use time:  8.916096109896898
     >>> train loss: 1.539289116859436 accuracy: 0.5234375
     >>> train loss: 1.6461944580078125 accuracy: 0.4765625
     >>> train loss: 1.183746099472046 accuracy: 0.609375
     >>> train loss: 1.3629640340805054 accuracy: 0.5625
     >>> train loss: 0.9062483906745911 accuracy: 0.65625
     >>> train loss: 1.1912500858306885 accuracy: 0.6328125
     >>> train loss: 1.0276848077774048 accuracy: 0.625
     >>> train loss: 1.1037805080413818 accuracy: 0.6015625
     >>> train loss: 1.0508602857589722 accuracy: 0.609375
     >>> train loss: 0.7948389053344727 accuracy: 0.6328125
     >>> train loss: 1.1571872234344482 accuracy: 0.5625
     >>> train loss: 0.6319066286087036 accuracy: 0.6875
     >>> train loss: 1.0348927974700928 accuracy: 0.59375
     >>> train loss: 0.7927854061126709 accuracy: 0.6796875
     >>> train loss: 0.5662117004394531 accuracy: 0.640625
     >>> train loss: 0.8857527375221252 accuracy: 0.6015625
epoch 1 train loss: 0.8

     >>> train loss: 0.35020700097084045 accuracy: 0.65625
     >>> train loss: 0.2961310148239136 accuracy: 0.703125
     >>> train loss: 0.37107574939727783 accuracy: 0.6953125
     >>> train loss: 0.16494566202163696 accuracy: 0.8203125
     >>> train loss: 0.28931888937950134 accuracy: 0.671875
     >>> train loss: 0.29804670810699463 accuracy: 0.6953125
     >>> train loss: 0.32759228348731995 accuracy: 0.6796875
     >>> train loss: 0.2628668546676636 accuracy: 0.703125
     >>> train loss: 0.2818777859210968 accuracy: 0.65625
     >>> train loss: 0.28247156739234924 accuracy: 0.7421875
     >>> train loss: 0.2781570553779602 accuracy: 0.6953125
     >>> train loss: 0.4173663854598999 accuracy: 0.65625
     >>> train loss: 0.27691650390625 accuracy: 0.6953125
     >>> train loss: 0.1879926323890686 accuracy: 0.6796875
     >>> train loss: 0.33778655529022217 accuracy: 0.7109375
epoch 9 train loss: 0.337787 | accuracy on validation data : 0.667055
     >>> train loss: 0.2588917016

In [8]:
# import requests
# def string_format(x):
#     return requests.get("http://39.108.171.231:8001/za_bot/q={}".format(x)).json()['text_split'].replace(" ", "")
# print(string_format("你不单身，所以所以不不拥挤"))
# t0 = time.time()
# bert = BERT(model_file="result/bert_za201908_mini/model.ckpt-100000")
# new_data = {}
# text_list = []
# for k in data.keys():
#      for c in ["a", "b"]:
#             data[k][c] = data[k][c].apply(string_format)
#             text_list += data[k][c].tolist()
# text_uniq = list(set(text_list))
# text2vec = dict(zip(text_uniq, bert.to_vector(text_uniq, out_array=False)))
# for k in data.keys():
#     new_data[k] = data[k].copy()
#     for c in ["a", "b"]:
#         new_data[k][c] = data[k][c].map(text2vec)

# t1 = time.time()
# print("vectoring use time: ", t1/60-t0/60)
tf.reset_default_graph()
emb_dim = 384*4
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    j = 0
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(new_data["train"], batch_size=128, shuffle=True):
            j += 1
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: np.array(input_l).astype(np.float32), 
                                                                             test_r: np.array(input_r).astype(np.float32), 
                                                                             test_y: np.array(input_y).astype(np.float32)})
            if j % 10 == 0:
                print(" "*4, ">>> train loss: {} accuracy: {}".format(cost/len(input_l), train_acc))
        accuracy = sess.run(acc, feed_dict={test_l: np.array(new_data["dev"]["a"].tolist()).astype(np.float32), 
                                            test_r: np.array(new_data["dev"]["b"].tolist()).astype(np.float32), 
                                            test_y: np.array(new_data["dev"]["y"].tolist()).astype(np.float32)})
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), accuracy))

KeyError: 'train'

In [None]:
1+2

## ----------------------------------------------------------------历史做法： 速度慢

In [9]:
bert = BERT(model_file="result/bert_za201908_big/model.ckpt-100000")
tf.reset_default_graph()
emb_dim = 768
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(data["train"], batch_size=128, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: bert.to_vector(input_l).astype(np.float32), 
                                                             test_r: bert.to_vector(input_r).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
            print(" "*4, ">>> train loss: {} acc: {}".format(cost/len(input_l), train_acc))
        accuracy = []
        for val_l, val_r, val_y in batch_iter(data["dev"], batch_size=128, shuffle=False):
            accuracy.append(sess.run(acc, feed_dict={test_l: bert.to_vector(val_l).astype(np.float32), 
                                                     test_r: bert.to_vector(val_r).astype(np.float32), 
                                                     test_y: np.array(val_y).astype(np.float32)}))
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), np.mean(accuracy)))

     >>> train loss: 1.7201642990112305 acc: 0.515625
     >>> train loss: 1.9415602684020996 acc: 0.515625
     >>> train loss: 1.8373547792434692 acc: 0.5234375
     >>> train loss: 1.7114359140396118 acc: 0.515625
     >>> train loss: 1.4612948894500732 acc: 0.546875
     >>> train loss: 1.8392605781555176 acc: 0.53125
     >>> train loss: 1.887251615524292 acc: 0.5390625
     >>> train loss: 1.895876407623291 acc: 0.5390625
     >>> train loss: 1.8766218423843384 acc: 0.5
     >>> train loss: 2.2388193607330322 acc: 0.46875
     >>> train loss: 1.8130422830581665 acc: 0.546875
     >>> train loss: 2.2557578086853027 acc: 0.484375
     >>> train loss: 1.2660441398620605 acc: 0.671875
     >>> train loss: 1.8164525032043457 acc: 0.5703125
     >>> train loss: 1.917902946472168 acc: 0.53125
     >>> train loss: 2.5921597480773926 acc: 0.390625
     >>> train loss: 1.9619215726852417 acc: 0.5234375
     >>> train loss: 1.8564125299453735 acc: 0.5390625
     >>> train loss: 1.5110725164

     >>> train loss: 1.1254023313522339 acc: 0.5546875
     >>> train loss: 1.734575867652893 acc: 0.421875
     >>> train loss: 1.3168156147003174 acc: 0.53125
     >>> train loss: 1.1347332000732422 acc: 0.5859375
     >>> train loss: 1.212856411933899 acc: 0.53125
     >>> train loss: 1.093843936920166 acc: 0.5078125
     >>> train loss: 1.3223791122436523 acc: 0.5078125
epoch 1 train loss: 1.322379 | accuracy on validation data : 0.504297
     >>> train loss: 1.5741267204284668 acc: 0.421875
     >>> train loss: 1.634793758392334 acc: 0.4296875
     >>> train loss: 1.145598292350769 acc: 0.515625
     >>> train loss: 1.2950990200042725 acc: 0.53125
     >>> train loss: 1.3575127124786377 acc: 0.484375
     >>> train loss: 1.453697681427002 acc: 0.4765625
     >>> train loss: 1.2504031658172607 acc: 0.5
     >>> train loss: 1.3305144309997559 acc: 0.453125
     >>> train loss: 1.3266551494598389 acc: 0.5390625
     >>> train loss: 1.7012150287628174 acc: 0.4453125
     >>> train los

KeyboardInterrupt: 

In [21]:
bert = BERT(output_index=-1)
tf.reset_default_graph()
emb_dim = 768
net = MlpMatch(emb_size=emb_dim)

test_l = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, emb_dim], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(data["train"], batch_size=128, shuffle=True):
            cost, _, train_acc = sess.run([loss, optimizer, acc], feed_dict={test_l: bert.to_vector(input_l).astype(np.float32), 
                                                             test_r: bert.to_vector(input_r).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
            print(" "*4, ">>> train loss: {} acc: {}".format(cost/len(input_l), train_acc))
        accuracy = []
        for val_l, val_r, val_y in batch_iter(data["dev"], batch_size=128, shuffle=False):
            accuracy.append(sess.run(acc, feed_dict={test_l: bert.to_vector(val_l).astype(np.float32), 
                                                     test_r: bert.to_vector(val_r).astype(np.float32), 
                                                     test_y: np.array(val_y).astype(np.float32)}))
        print("epoch %d train loss: %f | accuracy on validation data : %f" % (e+1, cost/len(input_l), np.mean(accuracy)))

     >>> train loss: 1.3572967052459717 acc: 0.4375
     >>> train loss: 1.0427614450454712 acc: 0.5
     >>> train loss: 1.408369541168213 acc: 0.4609375
     >>> train loss: 1.393079400062561 acc: 0.4453125
     >>> train loss: 1.0551095008850098 acc: 0.484375
     >>> train loss: 1.5600337982177734 acc: 0.40625
     >>> train loss: 1.0152246952056885 acc: 0.546875
     >>> train loss: 1.1719434261322021 acc: 0.4609375
     >>> train loss: 0.7460293769836426 acc: 0.5546875
     >>> train loss: 0.8678512573242188 acc: 0.484375
     >>> train loss: 0.8931200504302979 acc: 0.4921875
     >>> train loss: 1.3132646083831787 acc: 0.4296875
     >>> train loss: 1.0457582473754883 acc: 0.5
     >>> train loss: 0.9181177616119385 acc: 0.484375
     >>> train loss: 0.9810527563095093 acc: 0.4765625
     >>> train loss: 1.023012638092041 acc: 0.515625
     >>> train loss: 0.866935133934021 acc: 0.5078125
     >>> train loss: 0.9800042510032654 acc: 0.4609375
     >>> train loss: 1.0225468873977

     >>> train loss: 0.578159749507904 acc: 0.5703125
     >>> train loss: 0.5104390382766724 acc: 0.609375
     >>> train loss: 0.5361513495445251 acc: 0.6015625
     >>> train loss: 0.3780931234359741 acc: 0.640625
     >>> train loss: 0.5171853303909302 acc: 0.5546875
     >>> train loss: 0.4536614716053009 acc: 0.6640625
     >>> train loss: 0.5215556621551514 acc: 0.625
     >>> train loss: 0.4195683002471924 acc: 0.609375
epoch 1 train loss: 0.419568 | accuracy on validation data : 0.609961
     >>> train loss: 0.4715961217880249 acc: 0.625
     >>> train loss: 0.3821600675582886 acc: 0.625
     >>> train loss: 0.462408185005188 acc: 0.578125
     >>> train loss: 0.37534183263778687 acc: 0.65625
     >>> train loss: 0.4852742850780487 acc: 0.65625
     >>> train loss: 0.4135369062423706 acc: 0.6171875
     >>> train loss: 0.4838082492351532 acc: 0.59375
     >>> train loss: 0.39961037039756775 acc: 0.6171875
     >>> train loss: 0.37637391686439514 acc: 0.6796875
     >>> train l

     >>> train loss: 0.427790105342865 acc: 0.609375
     >>> train loss: 0.3242745101451874 acc: 0.671875
     >>> train loss: 0.4000222384929657 acc: 0.5859375
     >>> train loss: 0.31905630230903625 acc: 0.6953125
     >>> train loss: 0.24604183435440063 acc: 0.65625
     >>> train loss: 0.28437623381614685 acc: 0.6875
     >>> train loss: 0.3176792860031128 acc: 0.65625
     >>> train loss: 0.24916258454322815 acc: 0.7421875
     >>> train loss: 0.4361753463745117 acc: 0.59375
     >>> train loss: 0.33698830008506775 acc: 0.640625
     >>> train loss: 0.4201204776763916 acc: 0.625
     >>> train loss: 0.4589358866214752 acc: 0.53125
     >>> train loss: 0.3544965386390686 acc: 0.640625
     >>> train loss: 0.35143589973449707 acc: 0.6796875
     >>> train loss: 0.2812773287296295 acc: 0.65625
     >>> train loss: 0.25590282678604126 acc: 0.703125
     >>> train loss: 0.31656551361083984 acc: 0.6953125
     >>> train loss: 0.32046687602996826 acc: 0.671875
epoch 2 train loss: 0.320

     >>> train loss: 0.27392178773880005 acc: 0.671875
     >>> train loss: 0.20665235817432404 acc: 0.703125
     >>> train loss: 0.2349272519350052 acc: 0.6640625
     >>> train loss: 0.2621561884880066 acc: 0.65625
     >>> train loss: 0.21637172996997833 acc: 0.7578125
     >>> train loss: 0.43335819244384766 acc: 0.5546875
     >>> train loss: 0.34849268198013306 acc: 0.6015625
     >>> train loss: 0.3513830304145813 acc: 0.625
     >>> train loss: 0.28657829761505127 acc: 0.65625
     >>> train loss: 0.2797906696796417 acc: 0.6484375
     >>> train loss: 0.3281814157962799 acc: 0.6171875
     >>> train loss: 0.24615536630153656 acc: 0.6640625
     >>> train loss: 0.26944559812545776 acc: 0.65625
     >>> train loss: 0.26157844066619873 acc: 0.6640625
     >>> train loss: 0.3734705448150635 acc: 0.609375
     >>> train loss: 0.304768443107605 acc: 0.609375
     >>> train loss: 0.20549462735652924 acc: 0.65625
     >>> train loss: 0.2748517394065857 acc: 0.578125
     >>> train los

     >>> train loss: 0.22842390835285187 acc: 0.640625
     >>> train loss: 0.20622192323207855 acc: 0.5859375
     >>> train loss: 0.19639378786087036 acc: 0.640625
     >>> train loss: 0.21235992014408112 acc: 0.671875
     >>> train loss: 0.28287196159362793 acc: 0.6171875
     >>> train loss: 0.2521624267101288 acc: 0.625
     >>> train loss: 0.20457719266414642 acc: 0.703125
     >>> train loss: 0.2738567888736725 acc: 0.6171875
     >>> train loss: 0.23163625597953796 acc: 0.6171875
     >>> train loss: 0.2759988307952881 acc: 0.609375
     >>> train loss: 0.26168644428253174 acc: 0.6015625
     >>> train loss: 0.21951285004615784 acc: 0.6484375
     >>> train loss: 0.21036547422409058 acc: 0.6796875
     >>> train loss: 0.23631441593170166 acc: 0.6640625
     >>> train loss: 0.24560105800628662 acc: 0.6171875
     >>> train loss: 0.24085575342178345 acc: 0.671875
     >>> train loss: 0.3140782117843628 acc: 0.59375
     >>> train loss: 0.18900534510612488 acc: 0.6953125
     >>>

     >>> train loss: 0.27399972081184387 acc: 0.6171875
     >>> train loss: 0.1456325501203537 acc: 0.6953125
     >>> train loss: 0.18738839030265808 acc: 0.671875
     >>> train loss: 0.20596134662628174 acc: 0.640625
     >>> train loss: 0.21315470337867737 acc: 0.6328125
     >>> train loss: 0.15729324519634247 acc: 0.6875
     >>> train loss: 0.20872901380062103 acc: 0.703125
     >>> train loss: 0.22602644562721252 acc: 0.6796875
     >>> train loss: 0.20998001098632812 acc: 0.671875
     >>> train loss: 0.23780739307403564 acc: 0.6015625
     >>> train loss: 0.16803495585918427 acc: 0.71875
     >>> train loss: 0.27070531249046326 acc: 0.5625
     >>> train loss: 0.22116674482822418 acc: 0.6796875
     >>> train loss: 0.20916321873664856 acc: 0.6875
     >>> train loss: 0.1917409598827362 acc: 0.640625
     >>> train loss: 0.19084787368774414 acc: 0.671875
     >>> train loss: 0.16922080516815186 acc: 0.6484375
     >>> train loss: 0.23541522026062012 acc: 0.609375
     >>> tra

     >>> train loss: 0.19386018812656403 acc: 0.640625
     >>> train loss: 0.11203819513320923 acc: 0.7265625
     >>> train loss: 0.17598867416381836 acc: 0.6171875
     >>> train loss: 0.13642632961273193 acc: 0.6796875
     >>> train loss: 0.17101867496967316 acc: 0.71875
     >>> train loss: 0.16311441361904144 acc: 0.671875
     >>> train loss: 0.18506930768489838 acc: 0.671875
     >>> train loss: 0.15989848971366882 acc: 0.6875
     >>> train loss: 0.23664017021656036 acc: 0.6328125
     >>> train loss: 0.1348080039024353 acc: 0.6328125
     >>> train loss: 0.13514986634254456 acc: 0.6640625
     >>> train loss: 0.16310903429985046 acc: 0.71875
     >>> train loss: 0.2670969069004059 acc: 0.6328125
     >>> train loss: 0.19049504399299622 acc: 0.7421875
     >>> train loss: 0.18568724393844604 acc: 0.6796875
     >>> train loss: 0.13116005063056946 acc: 0.6953125
     >>> train loss: 0.20696237683296204 acc: 0.609375
     >>> train loss: 0.12729309499263763 acc: 0.703125
     >

KeyboardInterrupt: 

关于向量输出的测试

In [31]:
a = bert.to_vector(["明明我还是单身↑"], text_len=512)
a[0][1][0:9]

origin input: [<tf.Tensor 'strided_slice:0' shape=() dtype=int32>, 512]
['[CLS]', '明', '明', '我', '还', '是', '单', '身', '↑', '[SEP]']


[0.324718,
 0.372894,
 0.087528,
 0.38593,
 -0.269046,
 -0.922128,
 -0.300606,
 -0.201306,
 -0.409154]

In [19]:
len(a[0][0])

768

In [2]:
import json
with open("data/output.jsonl", "r") as fp:
    eg = json.load(fp)

In [3]:
eg

{'linex_index': 0,
 'features': [{'token': '[CLS]',
   'layers': [{'index': -1,
     'values': [0.629183,
      -0.191103,
      -0.26365,
      0.161589,
      1.073498,
      -0.745127,
      1.063478,
      -0.914787,
      -1.393338,
      0.247532,
      -0.400075,
      0.441474,
      -0.191388,
      0.652307,
      1.507912,
      -0.712661,
      0.845644,
      -1.181462,
      -1.183759,
      -1.075841,
      0.285883,
      -0.01798,
      -0.699328,
      0.36628,
      -0.333964,
      0.082447,
      -0.474392,
      0.129567,
      0.566385,
      0.358034,
      0.494514,
      0.183854,
      -0.460347,
      0.447424,
      0.742525,
      0.034597,
      0.456384,
      -0.129924,
      0.068336,
      -0.235207,
      -0.477076,
      -0.152544,
      -0.55563,
      2.156748,
      0.882817,
      -0.258834,
      -0.037365,
      1.053524,
      -0.91791,
      0.584426,
      0.165221,
      6.820019,
      1.782693,
      0.562387,
      -1.037156,
      0.32

In [5]:
len(eg['features'][0]['layers'][0]['values'])

768