# bi-LSTM+viterbi分词
* 使用TensorFlow大家bi-LSTM神经网络，实现文本序列输入-》标记序列输出
* 使用viterbi对标记序列规范化
* 初始状态：人为设定'b','s'为有效，'e'，'m'无效
* 状态转移矩阵：人为设定，符合规则的设置成0.5，不符合规则的设置成0.0

* 导入需要的包

In [1]:
import tensorflow as tf
import re
import pickle
import random
import numpy as np

* 定义训练样本处理方法

In [2]:
def read_data(data_path):
    """
    读取语料文件，将字和标记分开以list形式存储，形式[[字],[标记]]
    :param data_path:语料文件路径，文件格式：字1/标记1  字2/标记2  
    :return content:  [[字1,标记1],[字2,标记2],...]
    """
    with open(data_path, 'r', encoding='utf-8') as f:
        word_list = f.read().split()
    all_list = [[word.split('/')[0], word.split('/')[1]] for word in word_list]
    return all_list

# 制作word2id,id2word,tag2id
def make_dict(all_list):
    """
    统计语料中字和标记，生成字和标记的编码
    :param all_list:输入数据，格式：[[字1,标记1],[字2,标记2],...]
    :return word2id, id2word, tag2id: 字：列表值，列表值：字，标记：列表值
    """
    ##统计字和标记
    all_char = [] #以hash的形式存储字符
    all_tag = [] #以hash的形式存储标记
    for i in all_list:
        if i[0] not in all_char:
            all_char.append(i[0])
        if i[1] not in all_tag:
            all_tag.append(i[1])

    #添加不认识的字，或者pad的字
    all_char.append('<UNK>')
    all_char.append('<PAD>')
    #添加非字对应的标记
    all_tag.append('x')
    print(all_tag)
    
    ##为字和标记编码成列表
    word2id = {}#字：列表值
    id2word = {}#列表值：字
    tag2id = {}#标记：列表值
 
    for index, char in enumerate(all_char):
        word2id[char] = index
        id2word[index] = char
    for index, char in enumerate(all_tag):
        tag2id[char] = index
    return word2id, id2word, tag2id


def data_util(data_path,word2id, tag2id):
    """
    将样本转换成字和标记的列表值
    :param data_path:语料文件路径
    :param word2id:字：列表值
    :param tag2id:标记：列表值
    :return all_list: 一条样本格式：[[字1,标记1],[字2,标记2],...,[字n,标记n],句子长度]
    """
    with open(data_path, "r", encoding="utf8") as f:
        data = f.read()
    rr = re.compile(r'[,，。、“”‘’－》《（）●：！;…？]/s')
    sentences = rr.split(data)#以上式作为句子的分隔符，分隔出字符句子
    sentences = list(filter(lambda x: x.strip(), sentences))#过滤掉空句子
    sentences = list(map(lambda x: x.strip(), sentences))#将字符串句子的前后空格去掉

    all_list = []
    for i in sentences:
        word_list = i.split()#默认以空格分隔
        one_list = [[word2id[word.split('/')[0]], tag2id[word.split('/')[1]]] for word in word_list]
        one_list.append(len(word_list))#计算记录样本的长度
        all_list.append(one_list)
    return all_list

# 产生随机的embedding矩阵
def random_embedding(word2id, embedding_dim):
    """
    生成嵌入层的矩阵，初始化成随机值。
    :param id2word:列表：字-》统计所有字个数
    :param embedding_dim:词向量维度
    :return:
    """
    embedding_mat = np.random.uniform(-0.25, 0.25, (len(word2id), embedding_dim))
    embedding_mat = np.float32(embedding_mat)
    return embedding_mat

### 处理训练数据

In [3]:
data_path = 'zu_data\data.txt'#训练样本路径
all_list = read_data(data_path)#将样本处理成[字，标]类型

In [4]:
word2id, id2word, tag2id = make_dict(all_list)

['b', 'e', 's', 'm', 'x']


In [5]:
data = data_util(data_path, word2id, tag2id)

#拆分数据为训练集和验证集
train_set = data[:-1000]
test_set = data[-1000:]

### 构建bi-LSTM神经网络

* 定义模型参数

In [11]:
# 所有参数
hidden_size = 128
batch_size = 512
cell_nums = 2#LSTM的层数
epoch_num = 1
optimizer = 'Adam'#定义优化器
lr = 0.001
clip = 5.0
dropout = 1
num_tags = 5#标签维度
update_embedding = True
embedding_dim = 200
shuffle = False
isTrain = True

* 定义batch获取方法
* 定义标签处理方法

In [13]:
def to_one_hot(labels, tag_nums):
    """
    将2维的tag转化为one-hot形式，return结果为3维
    :param labels:标签格式：[[1,2,3],[句子的标记序列2],..]
    :param tag_nums:标签种类个数
    :return:标签格式：[[[0,1,0,0,0],[0,0,1,0,0],[0,0,0,1,0]],[[],[],...,[]],..]
    """
    length = len(labels)#batch的样本个数
    len_lab = len(labels[0])#第一条样本中字的个数
    res = np.zeros((length, len_lab, tag_nums), dtype=np.float32)
    for i in range(length):
        for j in range(len_lab):
            res[i][j][labels[i][j]] = 1.
    return np.array(res)


def get_batch(data, batch_size, word2id, tag2id, shuffle=False):
    """
    将数据pad，生成batch数据返回，这里没有取余数。pad长度是batch中最大句子的长度。
    :param data:格式：[[字1,标记1],[字2,标记2],...,[字n,标记n],句子长度]
    :param batch_size:
    :param vocab:
    :param shuffle:
    :return:
    """
    # 乱序没有加
    if shuffle:
        random.shuffle(data)
    pad = word2id['<PAD>']
    tag_pad = tag2id["x"]
    
    for i in range(len(data) // batch_size):
        data_size = data[i * batch_size: (i + 1) * batch_size]
        seqs, labels, sentence_legth = [], [], []
        for i in data_size:
            one_line = np.array(i[:-1])  #去除数据
            seqs.append(one_line[:,0])   #获取字序列
            labels.append(one_line[:,1]) #获取标签序列
            sentence_legth.append(i[-1]) #获取句子长度
        max_l = max(sentence_legth)      #句子长度最大的值作为pad的维度

        res_seq = []#pad后的字序列
        for sent in seqs:
            sent_new = np.concatenate((sent, np.tile(pad, max_l - len(sent))), axis=0)  #以pad的形式补充成等长的帧数
            res_seq.append(sent_new)

        res_labels = []#pad后的标记序列
        for label in labels:
            label_new = np.concatenate((label, np.tile(tag_pad, max_l - len(label))), axis=0)  #以pad的形式补充成等长的帧数
            res_labels.append(label_new)

        res_labels = to_one_hot(res_labels, 5)#将标记序列one-hot处理生成最终训练标签
        yield np.array(res_seq), res_labels, sentence_legth

def get_batch1(data, batch_size, word2id, tag2id, shuffle=False):
    """
    将数据pad，生成batch数据返回，这里没有取余数。pad长度是batch中指定的句子长度。
    :param data:格式：[[字1,标记1],[字2,标记2],...,[字n,标记n],句子长度]
    :param batch_size:
    :param vocab:
    :param shuffle:
    :return:
    """
    # 乱序没有加
    if shuffle:
        random.shuffle(data)
    pad = word2id['<PAD>']
    tag_pad = tag2id["x"]
    for i in range(len(data) // batch_size):
        data_size = data[i * batch_size: (i + 1) * batch_size]
        seqs, labels, sentence_legth = [], [], []
        for i in data_size:
            one_line = np.array(i[:-1])
            seqs.append(one_line[:, 0])
            labels.append(one_line[:, 1])
            sentence_legth.append(i[-1])
        max_l = max(sentence_legth)
        res_seq = []
        for sent in seqs:
            if len(sent)>=32:
                sent_new = sent[:32]
            else:
                sent_new = np.concatenate((sent, np.tile(pad, 32 - len(sent))), axis=0)  # 以pad的形式补充成等长的帧数
            res_seq.append(sent_new)
        res_labels = []
        for label in labels:
            if len(label)>=32:
                label_new = label[:32]
            else:
                label_new = np.concatenate((label, np.tile(tag_pad, 32 - len(label))), axis=0)  # 以pad的形式补充成等长的帧数
            res_labels.append(label_new)
        res_labels = to_one_hot(res_labels, 5)
        yield np.array(res_seq), res_labels, sentence_legth

* 定义embedding层

In [12]:
embeddings = random_embedding(word2id, embedding_dim)

* 构建神经网络结构图

In [14]:
graph = tf.Graph()
with graph.as_default():
    """
    构建神经网络的结构、损失、优化方法和评估方法
    """
        
    #模型搭建
    
    # shape[batch_size, sentences]
    word_ids = tf.placeholder(tf.int32, shape=[None, None], name="word_ids")
    # shape[batch_size, sentences, labels]
    labels = tf.placeholder(tf.int32, shape=[None, None, num_tags], name="labels")
    # 真实序列长度：shape[batch_size,1]
    sequence_lengths = tf.placeholder(tf.int32, shape=[None,], name="sequence_lengths")
    #dropout keep_prob
    dropout_pl = tf.placeholder(dtype=tf.float32, shape=(), name="dropout")

    with tf.variable_scope("words"):#命名空间
        _word_embeddings = tf.Variable(embeddings,#shape[len_words,200]
                                       dtype=tf.float32,
                                       trainable=update_embedding,#嵌入层是否可以训练
                                       name="_word_embeddings")
        word_embeddings = tf.nn.embedding_lookup(params=_word_embeddings,ids=word_ids,name="word_embeddings")
        word_embeddings = tf.nn.dropout(word_embeddings, dropout_pl)

    with tf.variable_scope("fb-lstm"):
        cell_fw = [tf.nn.rnn_cell.LSTMCell(hidden_size) for _ in range(cell_nums)]
        cell_bw = [tf.nn.rnn_cell.LSTMCell(hidden_size) for _ in range(cell_nums)]
        rnn_cell_fw = tf.nn.rnn_cell.MultiRNNCell(cell_fw)
        rnn_cell_bw = tf.nn.rnn_cell.MultiRNNCell(cell_bw)
        (output_fw_seq, output_bw_seq), states = tf.nn.bidirectional_dynamic_rnn(rnn_cell_fw, rnn_cell_bw, word_embeddings,
                                                          sequence_length=sequence_lengths, dtype=tf.float32)
        # output的shape是[batch_size, sentences, hidden_size*2]
        output = tf.concat([output_fw_seq, output_bw_seq], axis=-1)
        output = tf.nn.dropout(output, dropout_pl)

    with tf.variable_scope("classification"):
        # logits:shape[batch_size, sentences, num_tags]
        logits = tf.layers.dense(output, num_tags)
    #计算损失
    
    with tf.variable_scope("loss"):
        losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels)
        # mask的功能是产生True、False矩阵，根据最长的序列产生。类似[Ture,Ture,Ture,Ture,Ture,Ture,Ture,Ture,Ture,False]
        mask = tf.sequence_mask(sequence_lengths)
        # boolean_mask的作用将loss里面超过真实长度的loss去掉
        # 如果你这样做了，写评价函数时，也需要将pad的部分去掉。
        losses = tf.boolean_mask(losses, mask)
        loss = tf.reduce_mean(losses)

    #选择优化器
        
    with tf.variable_scope("train_step"):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        global_add = global_step.assign_add(1)#用于计数
        
        if optimizer == 'Adam':
            optim = tf.train.AdamOptimizer(learning_rate=lr)
        elif optimizer == 'Adadelta':
            optim = tf.train.AdadeltaOptimizer(learning_rate=lr)
        elif optimizer == 'Adagrad':
            optim = tf.train.AdagradOptimizer(learning_rate=lr)
        elif optimizer == 'RMSProp':
            optim = tf.train.RMSPropOptimizer(learning_rate=lr)
        elif optimizer == 'Momentum':
            optim = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9)
        elif optimizer == 'SGD':
            optim = tf.train.GradientDescentOptimizer(learning_rate=lr)
        else:
            optim = tf.train.GradientDescentOptimizer(learning_rate=lr)

        grads_and_vars = optim.compute_gradients(loss)
        
        # 对梯度gradients进行裁剪，保证在[-clip, clip]之间。
        grads_and_vars_clip = [[tf.clip_by_value(g, -clip, clip), v] for g, v in grads_and_vars]
        
        train_op = optim.apply_gradients(grads_and_vars_clip, global_step=global_step)

    #准确率计算
        
    with tf.variable_scope("evaluation"):
        true_ = tf.cast(tf.argmax(labels, axis=-1), tf.float32)#真实序列的值
        labels_softmax = tf.nn.softmax(logits)
        print(labels_softmax)
        labels_softmax_ = tf.argmax(logits, axis=-1)
        pred_ = tf.cast(labels_softmax_, tf.float32)#预测序列的值
        print(pred_)
        zeros_like_actuals = tf.zeros_like(true_)#生成相同shape的全0的tensor
        four_like_actuals = tf.ones_like(true_) * 4#生成相同shape的全4的tensor
        
        mask1 = tf.equal(tf.cast(tf.equal(four_like_actuals, true_), tf.float32), zeros_like_actuals)
        
        true = tf.boolean_mask(true_, mask1)
        pred = tf.boolean_mask(pred_, mask1)
        
        accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, true), tf.float32))


Tensor("evaluation/Softmax:0", shape=(?, ?, 5), dtype=float32)
Tensor("evaluation/Cast_1:0", shape=(?, ?), dtype=float32)


* 运行图

In [15]:
with tf.Session(graph=graph) as sess:
    if isTrain:
        saver = tf.train.Saver(tf.global_variables())
        try:
            ckpt_path = tf.train.latest_checkpoint('checkpoint/')
            saver.restore(sess, ckpt_path)
        except ValueError:
            init = tf.global_variables_initializer()
            sess.run(init)
        for epoch in range(epoch_num):
            for res_seq, res_labels, sentence_legth in get_batch(train_set, batch_size, word2id, tag2id, shuffle=shuffle):
                _, l, acc, global_nums, logits_, labels_ = sess.run([train_op, loss, accuracy, global_add,pred_, true_], {
                    word_ids: res_seq,
                    labels: res_labels,
                    sequence_lengths: sentence_legth,
                    dropout_pl:dropout
                })
                if global_nums % 80 == 0:#每20个batch保存一次
                    saver.save(sess, './checkpoint/model.ckpt', global_step=global_nums)
                    print('epoch {}, global_step {}, loss: {:.4}, accuracy: {:.4} '.format(epoch + 1, global_nums + 1, l, acc))
        #验证集上测试效果
        
        nums = 0
        for res_seq, res_labels, sentence_legth in get_batch(test_set, batch_size,word2id, tag2id,shuffle=shuffle):
            l, acc = sess.run([loss, accuracy], {
                    word_ids: res_seq,
                    labels: res_labels,
                    sequence_lengths: sentence_legth,
                    dropout_pl: dropout
                })
            nums += 1
            if nums % 1 == 0:
                print('global_step {}, loss: {:.4}, accuracy: {:.4} '.format(nums + 1, l, acc))

epoch 1, global_step 81, loss: 1.23, accuracy: 0.4292 
epoch 1, global_step 161, loss: 1.021, accuracy: 0.5607 
epoch 1, global_step 241, loss: 0.7721, accuracy: 0.7121 
epoch 1, global_step 321, loss: 0.6177, accuracy: 0.7803 
epoch 1, global_step 401, loss: 0.5184, accuracy: 0.8202 
Instructions for updating:
Use standard file APIs to delete files with this prefix.
epoch 1, global_step 481, loss: 0.4949, accuracy: 0.8297 
epoch 1, global_step 561, loss: 0.5764, accuracy: 0.783 
epoch 1, global_step 641, loss: 0.4563, accuracy: 0.8413 
epoch 1, global_step 721, loss: 0.4548, accuracy: 0.8375 
epoch 1, global_step 801, loss: 0.4366, accuracy: 0.8399 
epoch 1, global_step 881, loss: 0.4149, accuracy: 0.85 
epoch 1, global_step 961, loss: 0.3645, accuracy: 0.8726 
epoch 1, global_step 1041, loss: 0.3687, accuracy: 0.8712 
epoch 1, global_step 1121, loss: 0.2997, accuracy: 0.9018 
epoch 1, global_step 1201, loss: 0.3866, accuracy: 0.8654 
epoch 1, global_step 1281, loss: 0.363, accuracy: 

* 加载一个预训练的模型 

In [63]:
def run_model(all_seq, seq_length):
    """
    加载训练好的模型，获取模型的输入和输出节点。
    :param all_seq:格式：输入序列
    :param seq_length:输入序列长度
    :return pred1,pred2:输出序列tf.argmax只有的标签；输出序列softmax之后的标签
    """
    graph = tf.Graph()
    with tf.Session(graph=graph) as sess:
        # 加载存储路径
        check_point_path = 'checkpoint/'
        # 找到模型文件名
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir=check_point_path)

        #ckpt有model_checkpoint_path和all_model_checkpoint_paths两个属性，分别是最新的模型和所有的模型
        last_but_one_ckpt = ckpt.all_model_checkpoint_paths[-2]#选择倒数第二个模型 # 20190517 选择最好的模型
        
        
        #import_meta_graph将保存在.meta文件中的图添加到当前的图中
        saver = tf.train.import_meta_graph(last_but_one_ckpt+'.meta')
        
        #从模型中恢复参数
        saver.restore(sess, last_but_one_ckpt)
        
        input_placeholder = tf.get_default_graph().get_tensor_by_name("word_ids:0")  # [batch_size, 200, 200, 3]

        keep_prob_placeholder = tf.get_default_graph().get_tensor_by_name("dropout:0")  # [batch_size, 200, 200, 3]
        
        sequence_lengths = tf.get_default_graph().get_tensor_by_name("sequence_lengths:0")  # [batch_size, 200, 200, 3]
        
        output_1 = tf.get_default_graph().get_tensor_by_name("evaluation/Cast_1:0")
        
        output_2 = tf.get_default_graph().get_tensor_by_name("evaluation/Softmax:0")

        pred1,pred2 = sess.run([output_1,output_2], 
                               feed_dict={input_placeholder: all_seq, sequence_lengths:seq_length, keep_prob_placeholder:1.0})
        
        return pred1,pred2

def make_inputs(words):
    """
    将数据pad固定长度。
    :param words:预测样本
    :return sent_new:padding后的输入序列
    """
    pad = word2id['<PAD>']
    all_seq = [word2id[word] for word in words]
    sent_new = np.concatenate((all_seq, np.tile(pad, 32 - len(all_seq))), axis=0)
    sent_new = np.reshape(sent_new, (1, 32))
    return sent_new
    

def prediction_res(words,pred):
    """
    打印分词语句序列，及其对应的标注序列
    :param words:预测样本
    :param pred:预测标注序列
    :return tag_res,new_sentence:标记序列，分词后的句子
    """
    id2tag = {tag2id[key]:key for key in tag2id}
    tag_res = [id2tag[pred[0][i]] for i in range(len(words))]
    
    new_sentence = ""
    index = 0
    for i in range(len(tag_res)):
        if tag_res[i] == "s":
            new_sentence += words[i]
            new_sentence += "  "
        elif tag_res[i] == "b":
            index = i
        elif tag_res[i] == "e":
            new_sentence += words[index:i+1]
            new_sentence += "  "
        else:
            pass
    return (tag_res,new_sentence)



# words没有做新词，默认所有词都有，不能写空格
# words = '我家住在北京天安门'
# words = '我爱你中国'
# words = '王军虎去广州了'
# words = '在北京大学生活区喝进口红酒'
# words = '学生会宣传部'
# words = '沿海南方向逃跑'
# words = '这样的人才能经受住考验'
# words = '网曝徐峥夜会美女'
words = "谢娜与子怡正面较量，国外谁的名气更大，看到老外的反应就知道了"
sent_new = make_inputs(words)
pred1,pred2 = run_model(sent_new,[32])
tag_res,new_sentence = prediction_res(words, pred1)

print(words)
print(new_sentence)
print(tag_res)


INFO:tensorflow:Restoring parameters from checkpoint/model.ckpt-1200
谢娜与子怡正面较量，国外谁的名气更大，看到老外的反应就知道了
谢娜  与  谢娜与子怡  正面  较量  ，国  ，国外  谁  的  名气  更  大，  看到  老外  的  反应  就  知道  了  
['b', 'e', 's', 'm', 'e', 'b', 'e', 'b', 'e', 'b', 'e', 'e', 's', 's', 'b', 'e', 's', 'b', 'e', 'b', 'e', 'b', 'e', 's', 'b', 'e', 's', 'b', 'e', 's']


* 可以发现具有不符合逻辑的预测，'s'后面跟着'm'，'e'后面跟着'e'等，导致分词出现上述异常。

### viterbi规范化输出标记序列

* 转移概率，单纯用等概率

In [46]:
#状态转移矩阵
trans_p = {'b': {'b': 0.0, 'm': 0.5, 'e': 0.5, 's': 0.0,'x':0.0},
           'm': {'b': 0.0, 'm': 0.5, 'e': 0.5, 's': 0.0,'x':0.0},
           'e': {'b': 0.5, 'm': 0.0, 'e': 0.0, 's': 0.5,'x':0.5},
           's': {'b': 0.5, 'm': 0.0, 'e': 0.0, 's': 0.5,'x':0.5},
           'x': {'b': 0.0, 'm': 0.0, 'e': 0.0, 's': 0.0,'x':0.5}}
#初始状态
start_p = {'b': 1.0, 'm': 0.0, 'e': 0.0, 's': 1.0,'x':0.0}#初始状态

In [62]:
def viterbi(pred,states, start_p, trans_p):
    """
    改造的viterbi解码，寻找最优且合法路径
    :param pred:神经网络模型预测的状态序列
    :param states:状态集合
    :param start_p:初始状态
    :param trans_p:状态转移矩阵
    :return prob, path[state]:最优概率，最优合法路径
    """
    V = [{}]#存储状态对应的概率值
    path = {}#保存总路径
    
    #初始状态
    for y in range(len(states)):#['b','e','s','m','x']
        V[0][states[y]] = start_p[states[y]] * pred[0][y]
        path[states[y]] = [states[y]]
    #寻找最优路径    
    for t in range(1,len(pred)):
        V.append({})
        newpath = {}#保存当前层的全部最优路径
        
        for i in range(len(states)):#遍历当前层[0,1,2,3,4]
            prob = 0.0
            state = []
            for s in states:#遍历前一层['b','e','s','m','x']
                if V[t-1][s]>0:
                    if prob < V[t-1][s] * trans_p[s][states[i]] * pred[t][i]:#选择当前节点与前面节点最优的概率及路径
                        prob , state = V[t-1][s] * trans_p[s][states[i]] * pred[t][i] , s#保存当前节点与前面节点最优的概率及路径
            V[t][states[i]] = prob
            newpath[states[i]] = path[state] + [states[i]]
        path = newpath

    (prob, state) = max([(V[len(pred) - 1][y], y) for y in states])
    return (prob, path[state])
     

def cut1(sentence,pred2):
    """
    改造的viterbi解码，寻找最优且合法路径
    :param pred:神经网络模型预测的状态序列
    :param states:状态集合
    :param start_p:初始状态
    :param trans_p:状态转移矩阵
    :return prob, path[state]:最优概率，最优合法路径
    """
    prob, pos_list =  viterbi(pred2[0],['b','e','s','m','x'],start_p, trans_p)
    pos_list = pos_list[0:len(sentence)]#预测序列中截取真是输入序列的长度
    new_sentence = ""
    index = 0
    for i in range(len(pos_list)):
        if pos_list[i] == "s":
            new_sentence += sentence[i]
            new_sentence += "  "
        elif pos_list[i] == "b":
            index = i
        elif pos_list[i] == "e":
            new_sentence += sentence[index:i+1]
            new_sentence += "  "
        else:
            pass
 
    return (prob,pos_list,new_sentence)


words = "谢娜与子怡正面较量，国外谁的名气更大，看到老外的反应就知道了"
prob,pos_list,new_sentence = cut1(words,pred2)

print(words)
print(new_sentence)
print(pos_list)

谢娜与子怡正面较量，国外谁的名气更大，看到老外的反应就知道了
谢娜  与子怡  正面  较量  ，国外  谁  的  名气  更  大，  看到  老外  的  反应  就  知道  了  
['b', 'e', 'b', 'm', 'e', 'b', 'e', 'b', 'e', 'b', 'm', 'e', 's', 's', 'b', 'e', 's', 'b', 'e', 'b', 'e', 'b', 'e', 's', 'b', 'e', 's', 'b', 'e', 's']


* 可以发现已经没有不符合逻辑的预测了，例如：'s'后面跟着'm'，'e'后面跟着'e'等。
* 虽然分词效果还不理想，由于训练轮次紧训练了1轮，准确率在86%。在GPU上测试训练两轮就可以到90%，同样分词效果也会有所提升。

['b', 'e', 's', 'm', 'e', 'b', 'e', 'b', 'e', 'b', 'e', 'e', 's', 's', 'b', 'e', 's', 'b', 'e', 'b', 'e', 'b', 'e', 's', 'b', 'e', 's', 'b', 'e', 's']


['b', 'e', 'b', 'm', 'e', 'b', 'e', 'b', 'e', 'b', 'm', 'e', 's', 's', 'b', 'e', 's', 'b', 'e', 'b', 'e', 'b', 'e', 's', 'b', 'e', 's', 'b', 'e', 's']