In [1]:
import os
import json
import codecs
import numpy as np
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [2]:
params = {
    'batch_size': 64,
    'lr' : 0.001,
    'max_sent_len': 20,
    'epochs': 50,
    'drops' : [0.1]
         }

In [3]:
def extract_data(data_path):
    """
    意图识别抽取出label
    槽位识别与填充作为命名实体识别问题，对每一个字进行实体标注, ate_time', 'B-target', 'I-date_time', 'I-date_time', 'I-operation', 'I-date_time', 'I-date_time']
[ ]:
￼
​B E I O S
    """
    with codecs.open(data_path,"r",encoding="utf-8") as fp:
        data = json.load(fp)
    texts = [example['text'].replace(" ","") for example in data]
    intent_labels = [example['intent'] for example in data]
    
    slots_ners = []
    count = 0
    for example in data:
        if 'entities' in example.keys():
            text = example['text']
            ner = ['O'] * len(text)
            slots = example['entities']
            for key,val in slots.items():
                start_idx = text.find(val)
                end_idx = start_idx + len(val) -1
                if len(val) == 1:
                    ner[start_idx] = 'S-' + key
                else:
                    ner[start_idx] = 'B-' + key
                    ner[end_idx] = 'E-'+ key
                    for idx in range(start_idx+1, end_idx):
                        ner[idx] = 'I-' + key
        else:
            text = example['text']
            ner = ['O'] * len(text)
        slots_ners.append(ner)
    print('texts len: ', len(texts))
    print('intent_lables len: ',len(intent_labels))
    print('slots_ners len: ', len(slots_ners))
    return texts, intent_labels, slots_ners        

In [4]:
data_path ="../dataset/data_v2.json"
max_sent_len = params["max_sent_len"]
texts, intent_labels, slots_ners = extract_data(data_path)
# l = len(texts) // params['batch_size']
# texts = texts[:l*params['batch_size']]
# intent =  intent[:l*params['batch_size']]
# slots_ners = slots_ners[:l*params['batch_size']]
train_text = [d for i , d in enumerate(texts) if i % 10 != 0]
train_l = len(train_text) // params['batch_size']
train_text = train_text[:train_l*params['batch_size']]
valid_text = [d for i , d in enumerate(texts) if i % 10 == 0]
valid_l = len(valid_text) // params['batch_size']
valid_text = valid_text[:valid_l*params['batch_size']]

train_intent = [d for i , d in enumerate(intent_labels) if i % 10 != 0]
train_intent = train_intent[:train_l*params['batch_size']]
valid_intent = [d for i , d in enumerate(intent_labels) if i % 10 == 0]
valid_intent = valid_intent[:valid_l*params['batch_size']]

train_ner = [d for i , d in enumerate(slots_ners) if i % 10 != 0]
train_ner = train_ner[:train_l*params['batch_size']]
valid_ner = [d for i , d in enumerate(slots_ners) if i % 10 == 0]
valid_ner =valid_ner[:valid_l*params['batch_size']]

texts len:  2542
intent_lables len:  2542
slots_ners len:  2542


In [5]:
with open('../char_6.17.json', mode='r', encoding='utf-8') as f:
    dicts = json.load(f)

In [6]:
char2id = dicts['char2id']
id2char = dicts['id2char']
intent2id = dicts['intent2id']
id2intent = dicts['id2intent']
slot2id = dicts['slot2id']
id2slot = dicts['id2slot']

In [7]:
params['intent_num'] = len(intent2id)
params['slot_num'] = len(slot2id)
params['id2intent'] = id2intent
params['id2slot'] = id2slot

In [8]:
def trans2labelid(vocab, labels, max_sent_len):
    labels = [vocab[label] for label in labels]
    if len(labels) < max_sent_len:
        labels += [0] * (max_sent_len - len(labels))
    else:
        labels = labels[:max_sent_len]
    return labels

In [9]:
def read_data(txt_seqs, intent_labels, slot_ners,char2id,intent2id,slot2id,max_sent_len):
    dataset_text_labels = []
    dataset_intent_labels = []
    dataset_ner_labels = []
    
    for index in range(len(txt_seqs)):
        dataset_text_labels.append(trans2labelid(char2id,txt_seqs[index],max_sent_len))
        dataset_intent_labels.append([intent2id[intent_labels[index]]])
        dataset_ner_labels.append(trans2labelid(slot2id,slot_ners[index],max_sent_len))
    dataset_text_labels = np.array(dataset_text_labels)
    dataset_intent_labels = np.array(dataset_intent_labels)
    dataset_ner_labels = np.array(dataset_ner_labels)
    
    return dataset_text_labels, dataset_intent_labels, dataset_ner_labels 

In [10]:
tarin_seq, train_intent, train_ner =  read_data(train_text, train_intent, train_ner,char2id,intent2id,slot2id,max_sent_len) 

In [11]:
valid_seq, valid_intent, valid_ner =  read_data(valid_text, valid_intent, valid_ner,char2id,intent2id,slot2id,max_sent_len) 

In [12]:
def Dataset(txt_seqs, dataset_intent_labels, dataset_ner_labels):
    dataset = tf.data.Dataset.from_tensor_slices(({
    "Input" : txt_seqs
    },
    {
        "pre_intent":dataset_intent_labels,
        
        "pre_ner":dataset_ner_labels
    }))
    dataset = dataset.batch(params['batch_size'])
    return dataset

In [13]:
train_dataset = Dataset(tarin_seq, train_intent, train_ner)
valid_dataset = Dataset(valid_seq, valid_intent, valid_ner)

In [14]:
from tensorflow.keras.layers import Layer

class MultiHeadAttention(Layer):
    def __init__(
            self,
            heads,
            head_size,
            out_dim=None,
            use_bias=True,
#             max_value = 1,
#             min_value = -1
            **kwargs
    ):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self.heads = heads
        self.head_size = head_size
        self.out_dim = out_dim 
        self.use_bias = use_bias

    def build(self, input_shape):
        super(MultiHeadAttention, self).build(input_shape)
        self.q_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'q'
            
        )
        self.k_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'k'
        )
        self.v_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'v'
        )
        self.o_dense = tf.keras.layers.Dense( 
            units=self.out_dim,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'o'
        )

    def call(self, inputs):
        q = inputs
        k = inputs
        v = inputs
        # 线性变化
        qw = self.q_dense(q)
        kw = self.k_dense(k)
        vw = self.v_dense(v)
        # 形状变换
        qw = tf.reshape(qw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        kw = tf.reshape(kw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        vw = tf.reshape(vw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        # attention
        qkv_inputs = [qw, kw, vw]
        o = self.pay_attention_to(qkv_inputs)
        o = tf.reshape(o, (-1, tf.shape(o)[1], self.head_size * self.heads))
        o = self.o_dense(o)
        return o

    def pay_attention_to(self, inputs):
        (qw, kw, vw) = inputs[:3]
        a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
        a = a / self.head_size ** 0.5
        A = tf.nn.softmax(a)
        o = tf.einsum('bhjk,bkhd -> bjhd', A, vw)
#         print(o)
        return o

In [15]:
from tensorflow.keras.layers import concatenate, Dropout,LayerNormalization, Dense, add

In [16]:
class Encoder(tf.keras.models.Model):
    def __init__(
        self,
        layer_count,
        **kwargs
    ):
        super(Encoder, self).__init__(**kwargs)
        self.layer_count = layer_count
        
    def build(self,input_shape):
        self.MultiHeadAttention =  MultiHeadAttention(heads=16,head_size=4,out_dim=32)
        self.dropout_1 = Dropout(0.1)
        self.l1 =  LayerNormalization(name='normal')
        self.feed1 = Dense(32,name='feed')
        self.dropout1 = Dropout(0.1)
        self.l_1 =  LayerNormalization(name='normal1')
        
    def call(self,inputs):
        state = inputs
        for i in range(self.layer_count):
#             print('state: ',i)
            att1 = self.MultiHeadAttention(state)
            att_1 = add([att1,state])
#             dropout1  = self.dropout_1(att_1)
            l1 = self.l1(att_1)
            feed1 =self.feed1(l1)
#             dropout_1  = self.dropout1(feed1)
            l_1 = self.l_1(feed1)
            state = l_1
        return state

In [17]:
tf.keras.backend.clear_session()
text_inputs = tf.keras.layers.Input(shape=(20,),name='Input')
embed = tf.keras.layers.Embedding(500,32)(text_inputs)

l_1 = Encoder(layer_count=1)(embed)

conv = tf.keras.layers.GlobalAveragePooling1D()(l_1)
pre_intent = tf.keras.layers.Dense(params['intent_num'],activation='sigmoid',name = 'pre_intent',kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0))(conv)
pre_slot = tf.keras.layers.Dense(params['slot_num'],activation='sigmoid',name = 'pre_ner',kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0))(l_1)
model = tf.keras.Model(text_inputs,[pre_intent,pre_slot])
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input (InputLayer)              [(None, 20)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 20, 32)       16000       Input[0][0]                      
__________________________________________________________________________________________________
encoder (Encoder)               (None, 20, 32)       9600        embedding[0][0]                  
__________________________________________________________________________________________________
global_average_pooling1d (Globa (None, 32)           0           encoder[0][0]                    
_______________________________________________________________________________________

In [18]:
losses = {'pre_intent':'sparse_categorical_crossentropy','pre_ner':'sparse_categorical_crossentropy'}
metrics = { 'pre_intent': ['accuracy'],'pre_ner': ['accuracy']}
optimizer = tf.keras.optimizers.Adam(params['lr'])
model.compile(optimizer, loss=losses, metrics=metrics)

In [19]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath='../ner_model_weight/model_encoder_714.h5',save_weights_only=True,save_best_only=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=20,factor=0.8)

In [20]:
model.fit(train_dataset,epochs=params['epochs'],validation_data=valid_dataset,callbacks=[checkpoint,reduce_lr])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7ff34c4fbe50>

In [26]:
model.save_weights('../ner_model_weight/model_encoder_714.h5')

In [22]:
model.load_weights('../ner_model_weight/model_encoder_714.h5')

In [23]:
pre_intent,pre_slot = model.predict([[111, 196, 182, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [24]:
pre_intent

array([[2.9494049e-05, 5.3530930e-05, 8.8762766e-04, 1.3331781e-03,
        8.2310438e-03, 2.2651965e-04, 6.9630548e-02, 1.3630442e-02,
        1.6755251e-03, 6.0249095e-05, 1.6953712e-04, 8.2841929e-04,
        1.7529672e-03, 1.6962040e-04, 8.7594418e-03, 1.6872374e-03,
        8.2052469e-02, 3.5339527e-04, 3.5789917e-04, 9.6443466e-05,
        1.8358817e-02, 1.3664477e-03, 3.3313471e-03, 2.9194590e-03,
        5.9483160e-04, 8.1444468e-04, 8.0008281e-04, 6.7297084e-04,
        1.0471063e-03, 4.7507658e-04, 6.4713242e-03, 9.1811264e-05,
        2.4078325e-03, 2.3891955e-05, 1.8400388e-03, 5.8634733e-03,
        1.7386231e-04, 8.7405009e-05, 5.2923884e-04, 6.7850437e-05,
        4.5236826e-04, 1.2026340e-03, 4.8980106e-05, 2.4951095e-04,
        3.3015964e-04, 2.8099646e-04, 2.6102316e-01, 1.0366692e-03,
        5.1147066e-04, 5.6982944e-03, 3.2021280e-04, 6.5410853e-04,
        1.4881464e-04, 3.6891075e-03, 6.1456543e-05]], dtype=float32)

In [25]:
pre_slot

array([[[1.41103761e-02, 1.25193343e-04, 5.41495217e-04, 9.07725375e-03,
         6.08455914e-04, 3.31322923e-02, 1.71928643e-03, 1.30036788e-04,
         2.08800237e-04, 1.61213626e-03, 3.38990561e-04, 5.23552124e-04,
         1.68756989e-03, 1.57017086e-04, 2.48261523e-02, 8.47355742e-03,
         1.30898831e-03, 2.78503197e-04, 2.69482918e-02, 3.11168318e-04,
         3.42854677e-04, 2.83676032e-02, 5.70192351e-04, 1.13033201e-03,
         8.25727533e-04, 3.43502918e-03, 7.18734682e-01, 2.02587224e-03,
         5.22225630e-04, 1.63231033e-03, 1.07515727e-04, 1.60128140e-04,
         1.52094217e-04, 5.78077976e-04, 1.70798413e-02, 4.30360157e-03],
        [9.36264638e-03, 2.61177862e-04, 5.07572899e-04, 6.82142470e-03,
         1.46548473e-03, 3.72575037e-02, 5.16129285e-03, 1.45172686e-04,
         4.16871655e-04, 9.11060488e-05, 5.35520900e-04, 5.96660539e-04,
         5.58356522e-03, 2.08530459e-04, 6.41897172e-02, 7.81682506e-03,
         1.67384301e-03, 1.25431130e-03, 1.4214117