## ELMO效果测试

In [1]:
import tensorflow as tf
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from bilm import TokenBatcher, BidirectionalLanguageModel, weight_layers


class ELMO(object):
    def __init__(self, character=False, model_path="./"):
        vocab_file = os.path.join("./", 'seg_words.txt')
        options_file = os.path.join(model_path, 'options.json')
        weight_file = os.path.join(model_path,'weights.hdf5')
        token_embedding_file = os.path.join(model_path, 'vocab_embedding.hdf5')
        self.batcher = TokenBatcher(vocab_file)
        self.bilm = BidirectionalLanguageModel(options_file, weight_file, use_character_inputs=character, 
                                               embedding_weight_file=token_embedding_file)
        self.context_token_ids = tf.placeholder('int32', shape=(None, None))
        self.context_embeddings_op = self.bilm(self.context_token_ids)
        self.elmo_context_input = weight_layers('input', self.context_embeddings_op, l2_coef=0.0)
        self.elmo_context_output = weight_layers('output', self.context_embeddings_op, l2_coef=0.0)
        self.ses = tf.Session()
        self.ses.run(tf.global_variables_initializer())
        
    def to_vector(self, text_list):
        context_ids = self.batcher.batch_sentences(text_list)
        return self.ses.run(self.elmo_context_input['weighted_op'], feed_dict={self.context_token_ids: context_ids})

In [2]:
import time
t0 = time.time()
a = model.to_vector([list('这是啥'), ])
print(time.time()-t0)
a.shape

0.020879745483398438


(1, 3, 1024)

In [4]:
a.sum(axis=1).shape
a

array([[[-0.00932189, -0.59012043, -0.17174587, ..., -0.39263338,
         -0.6241608 ,  0.1475308 ],
        [-0.04369204,  0.07538365, -0.5509269 , ...,  0.35087165,
         -0.29011977,  0.03631983],
        [ 0.44123632, -0.06315079, -0.29441255, ...,  0.36990762,
         -0.24927339, -0.06850408]]], dtype=float32)

In [2]:
import pandas as pd
data = {"train": None, "dev": None}
def format_str(s):
    if len(s) < 30:
        return list(s)+['<UNK>']*(30-len(s))
    else:
        return list(s[0:30])
def format_label(yy):
    origin = [0, 0]
    origin[yy] += 1
    return origin
for k in data:
    data[k] = pd.read_csv("../data/za_data/kd_{}.csv".format(k), header=None, sep="\t", names=("a", "b", "y"))
    data[k]["y"] = data[k]["y"].apply(format_label)
    for c in ["a", "b"]:
        data[k][c] = data[k][c].apply(format_str)
        print("{} {} maxlen {}".format(k, c, data[k][c].apply(len).max()))

train a maxlen 30
train b maxlen 30
dev a maxlen 30
dev b maxlen 30


In [5]:
data["dev"].head()

Unnamed: 0,a,b,y
0,"[嗯, ，, 然, 后, 要, 交, 钱, 是, 吗, ，, <UNK>, <UNK>, <...","[我, 那, 个, 珍, 爱, 网, ，, 那, 是, 别, 人, 瞎, 给, 我, 整, ...","[1, 0]"
1,"[嗯, 你, 这, 是, 你, 到, 哪, 边, 的, <UNK>, <UNK>, <UNK...","[我, 我, 我, 这, 好, 像, 是, 我, 朋, 友, 的, 帮, 我, 讲, 的, ...","[1, 0]"
2,"[我, 想, 就, 是, 网, 上, 太, 假, 了, <UNK>, <UNK>, <UNK...","[你, ，, 你, 能, 看, 出, 我, 需, 要, 什, 么, 样, 的, 对, 象, ...","[1, 0]"
3,"[您, 来, 电, 话, 的, 话, 收, 费, 吗, ，, <UNK>, <UNK>, <...","[你, 啊, ，, 还, 是, 单, 身, 了, 你, ，, 你, 在, 这, 里, 什, ...","[1, 0]"
4,"[那, 你, 们, 这, 边, 要, 收, 费, 吗, ，, 这, 个, 收, 费, 怎, ...","[嗯, 你, 是, 哪, 个, 哪, 个, 平, 台, <UNK>, <UNK>, <UNK...","[1, 0]"


In [6]:
def batch_iter(df_gen, batch_size, shuffle=True):
    obs = len(df_gen)
    if shuffle:
        data_gen = df_gen.sample(frac=1).reset_index(drop=True)
    else:
        data_gen = df_gen.copy()
    batch_num = int(obs/batch_size)
    for j in range(batch_num):
        yield (data_gen["a"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist(),
        data_gen["b"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist(),
        data_gen["y"].iloc[j*batch_size:min(obs, j*batch_size+batch_size)].tolist())

In [7]:
class FCLayer(object):
    def __init__(self, num_in, num_out):
        self.num_in = num_in
        self.num_out = num_out
        self.weight = tf.Variable(tf.random_normal([num_in, num_out]))
        self.bias = tf.Variable(tf.random_normal([num_out]))

    def ops(self, input_x):
        out_without_bias = tf.matmul(input_x, self.weight)
        output = tf.nn.bias_add(out_without_bias, self.bias)
        return output
    
class MlpMatch(object):
    def __init__(self):
        self.n_class = 2
        self.emb_size = 1024
        self.hidden_size = 128
        self.bow_layer = FCLayer(self.emb_size, self.hidden_size)
        self.fc_layer = FCLayer(2 * self.hidden_size, self.n_class)
     
    def predict(self, left_slots, right_slots):
        left, right = left_slots, right_slots
        left_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_sum(left, axis=1)))
        right_bow = self.bow_layer.ops(tf.nn.softsign(tf.reduce_sum(right, axis=1)))
        concat = tf.concat([left_bow, right_bow], -1)
        pred = self.fc_layer.ops(concat)
        return pred

In [8]:
import numpy as np
model = ELMO(character=False, model_path='/data/zhangminchao/new_elmo')
tf.reset_default_graph()
net = MlpMatch()

test_l = tf.placeholder(tf.float32, [None, 30, 1024], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, 1024], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(data["train"], batch_size=128, shuffle=True):
            cost, _ = sess.run([loss, optimizer], feed_dict={test_l: model.to_vector(input_l).astype(np.float32), 
                                                             test_r:  model.to_vector(input_r).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
        accuracy = []
        for val_l, val_r, val_y in batch_iter(data["dev"], batch_size=128, shuffle=False):
            accuracy.append(sess.run(acc, feed_dict={test_l:  model.to_vector(val_l).astype(np.float32), 
                                                     test_r: model.to_vector(val_r).astype(np.float32), 
                                                     test_y: np.array(val_y).astype(np.float32)}))
        print(" "*4, "train loss: %f" % (cost/len(input_l)))
        print(" "*4, "epoch %d accuracy on validation data : %f" % (e+1, np.mean(accuracy)))

USING SKIP CONNECTIONS
Instructions for updating:
Use the `axis` argument instead
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

     train loss: 0.367286
     epoch 1 accuracy on validation data : 0.589648
     train loss: 0.255453
     epoch 2 accuracy on validation data : 0.616016
     train loss: 0.126862
     epoch 3 accuracy on validation data : 0.623438
     train loss: 0.158700
     epoch 4 accuracy on validation data : 0.623047
     train loss: 0.125178
     epoch 5 accuracy on validation data : 0.624609
     train loss: 0.162531
     epoch 6 accuracy on validation data : 0.623438
     train loss: 0.141452
     epoch 7 accuracy on validation data : 0.630469
     train loss: 0.092454
     epoch 8 accuracy on validation data : 0.632422
     train loss: 0.128707
     epoch 9 accuracy on validation data : 0.627148
     train loss: 0.058363

```
train loss: 0.498080
     epoch 1 accuracy on validation data : 0.510352
     train loss: 0.464507
     epoch 2 accuracy on validation data : 0.516797
     train loss: 0.448489
     epoch 3 accuracy on validation data : 0.519336
     train loss: 0.433034
     epoch 4 accuracy on validation data : 0.516406
     train loss: 0.394427
     epoch 5 accuracy on validation data : 0.517383
     train loss: 0.390801
     epoch 6 accuracy on validation data : 0.520117
     train loss: 0.261450
     epoch 7 accuracy on validation data : 0.521680
     train loss: 0.328833
     epoch 8 accuracy on validation data : 0.523633
     train loss: 0.306248
     epoch 9 accuracy on validation data : 0.529492
     train loss: 0.310917
     epoch 10 accuracy on validation data : 0.534180
     train loss: 0.341402
     epoch 11 accuracy on validation data : 0.540625
     train loss: 0.291676
     epoch 12 accuracy on validation data : 0.541602
```

In [10]:
import numpy as np
model = ELMO(character=False, model_path='./')
tf.reset_default_graph()
net = MlpMatch()
test_l = tf.placeholder(tf.float32, [None, 30, 1024], name="input_left")
test_r = tf.placeholder(tf.float32, [None, 30, 1024], name="input_right")
test_y = tf.placeholder(tf.float32, [None, 2], name="input_label")

pred = net.predict(test_l, test_r)
pred_prob = tf.nn.softmax(pred, -1)
pred_index = tf.argmax(pred_prob, 1)
correct_pred = tf.equal(pred_index, tf.argmax(test_y, 1))
acc = tf.reduce_mean(tf.cast(correct_pred, "float"))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=test_y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(12):
        for input_l, input_r, input_y in batch_iter(data["train"], batch_size=128, shuffle=True):
            cost, _ = sess.run([loss, optimizer], feed_dict={test_l: model.to_vector(input_l).astype(np.float32), 
                                                             test_r:  model.to_vector(input_r).astype(np.float32), 
                                                             test_y: np.array(input_y).astype(np.float32)})
        accuracy = []
        for val_l, val_r, val_y in batch_iter(data["dev"], batch_size=128, shuffle=False):
            accuracy.append(sess.run(acc, feed_dict={test_l:  model.to_vector(val_l).astype(np.float32), 
                                                     test_r: model.to_vector(val_r).astype(np.float32), 
                                                     test_y: np.array(val_y).astype(np.float32)}))
        print(" "*4, "train loss: %f" % (cost/len(input_l)))
        print(" "*4, "epoch %d accuracy on validation data : %f" % (e+1, np.mean(accuracy)))

USING SKIP CONNECTIONS
     train loss: 0.285930
     epoch 1 accuracy on validation data : 0.568945
     train loss: 0.306973
     epoch 2 accuracy on validation data : 0.583789
     train loss: 0.153048
     epoch 3 accuracy on validation data : 0.600977
     train loss: 0.158269
     epoch 4 accuracy on validation data : 0.604883
     train loss: 0.150963
     epoch 5 accuracy on validation data : 0.611523
     train loss: 0.148603
     epoch 6 accuracy on validation data : 0.569727
     train loss: 0.098603
     epoch 7 accuracy on validation data : 0.623633
     train loss: 0.103967
     epoch 8 accuracy on validation data : 0.615430
     train loss: 0.085192
     epoch 9 accuracy on validation data : 0.628711
     train loss: 0.082350
     epoch 10 accuracy on validation data : 0.633398
     train loss: 0.114665
     epoch 11 accuracy on validation data : 0.637109
     train loss: 0.062814
     epoch 12 accuracy on validation data : 0.635156


```
train loss: 0.783122
     epoch 1 accuracy on validation data : 0.526953
     train loss: 0.612946
     epoch 2 accuracy on validation data : 0.532227
     train loss: 0.706031
     epoch 3 accuracy on validation data : 0.531641
     train loss: 0.736148
     epoch 4 accuracy on validation data : 0.537305
     train loss: 0.615426
     epoch 5 accuracy on validation data : 0.540039
     train loss: 0.522318
     epoch 6 accuracy on validation data : 0.541016
     train loss: 0.454612
     epoch 7 accuracy on validation data : 0.543164
     train loss: 0.543438
     epoch 8 accuracy on validation data : 0.545703
     train loss: 0.563003
     epoch 9 accuracy on validation data : 0.546289
     train loss: 0.514223
     epoch 10 accuracy on validation data : 0.551562
     train loss: 0.615924
     epoch 11 accuracy on validation data : 0.552344
     train loss: 0.498719
     epoch 12 accuracy on validation data : 0.555078
```

### 测试样例

In [2]:
raw_context = ['这 是 测试 .', '好的 .']
tokenized_context = [sentence.split() for sentence in raw_context]
tokenized_question = [['这', '是', '什么'],]
tokenized_context

[['这', '是', '测试', '.'], ['好的', '.']]

In [1]:
context_token_ids = tf.placeholder('int32', shape=(None, None))
question_token_ids = tf.placeholder('int32', shape=(None, None))

context_embeddings_op = bilm(context_token_ids)
# question_embeddgins_op = bilm(question_token_ids)

elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0)
elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0)

# with tf.variable_scope('', reuse=True):
#     elmo_question_input = weight_layers('input', question_embeddgins_op, l2_coef=0.0)
# with tf.variable_scope('', reuse=True):
#     elmo_question_output = weight_layers('output', question_embeddgins_op, l2_coef=0.0)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    context_ids = batcher.batch_sentences(tokenized_context)
#     question_ids = batcher.batch_sentences(tokenized_question)
# elmo_question_input_
    elmo_context_input_ = sess.run(
        [
            elmo_context_input['weighted_op']
#             elmo_question_input['weighted_op']
        ],
        feed_dict={context_token_ids: context_ids, question_token_ids: question_ids})

# print(elmo_context_input_, elmo_question_input_)


USING SKIP CONNECTIONS
USING SKIP CONNECTIONS
[[[-0.0494836   0.15072705  0.02371956 ...  0.01728119  0.15322769
   -0.2924335 ]
  [-0.03865705  0.0443635   0.194944   ...  0.20702595 -0.17606495
   -0.15220994]
  [-0.02532118  0.07337871  0.08422874 ...  0.1530999   0.12955403
   -0.05277611]
  [-0.03031924  0.03041445  0.11669184 ...  0.06387962  0.06061445
   -0.00702057]]

 [[ 0.37764782 -0.00799875  0.166332   ...  0.1530999   0.12955403
   -0.05277611]
  [ 0.21324293  0.02273425  0.21007645 ...  0.06387962  0.06061445
   -0.00702057]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]] [[[-0.04948363  0.15072708  0.02371956 ...  0.02699655  0.21408397
   -0.1234289 ]
  [-0.03865702  0.04436348  0.194944   ...  0.21258295 -0.12991495
    0.07850938]
  [-0.02532118  0.07337872  0.08422876 ...  0.06387964  0.06061444
   -0.00702058]]]


In [3]:
elmo_context_input_.shape

(2, 4, 1024)

In [4]:
elmo_question_input_.shape

(1, 3, 1024)

In [5]:
tf.reset_default_graph()

print(elmo_context_input_)

USING SKIP CONNECTIONS
[array([[[-0.0494836 ,  0.15072705,  0.02371956, ...,  0.01728119,
          0.15322769, -0.2924335 ],
        [-0.03865705,  0.0443635 ,  0.194944  , ...,  0.20702595,
         -0.17606495, -0.15220994],
        [-0.02532118,  0.07337871,  0.08422874, ...,  0.1530999 ,
          0.12955403, -0.05277611],
        [-0.03031924,  0.03041445,  0.11669184, ...,  0.06387962,
          0.06061445, -0.00702057]],

       [[ 0.37764782, -0.00799875,  0.166332  , ...,  0.1530999 ,
          0.12955403, -0.05277611],
        [ 0.21324293,  0.02273425,  0.21007645, ...,  0.06387962,
          0.06061445, -0.00702057],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]]], dtype=float32)]
