In [1]:
import os
import json
import codecs
import numpy as np
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [22]:
params = {
    'batch_size': 64,
    'lr' : 0.001,
    'max_sent_len': 20,
    'epochs': 200,
    'drops' : [0.1]
         }

In [3]:
def extract_data(data_path):
    """
    意图识别抽取出label
    槽位识别与填充作为命名实体识别问题，对每一个字进行实体标注, ate_time', 'B-target', 'I-date_time', 'I-date_time', 'I-operation', 'I-date_time', 'I-date_time']
[ ]:
￼
​B E I O S
    """
    with codecs.open(data_path,"r",encoding="utf-8") as fp:
        data = json.load(fp)
    texts = [example['text'].replace(" ","") for example in data]
    intent_labels = [example['intent'] for example in data]
    
    slots_ners = []
    count = 0
    for example in data:
        if 'entities' in example.keys():
            text = example['text']
            ner = ['O'] * len(text)
            slots = example['entities']
            for key,val in slots.items():
                start_idx = text.find(val)
                end_idx = start_idx + len(val) -1
                if len(val) == 1:
                    ner[start_idx] = 'S-' + key
                else:
                    ner[start_idx] = 'B-' + key
                    ner[end_idx] = 'E-'+ key
                    for idx in range(start_idx+1, end_idx):
                        ner[idx] = 'I-' + key
        else:
            text = example['text']
            ner = ['O'] * len(text)
        slots_ners.append(ner)
    print('texts len: ', len(texts))
    print('intent_lables len: ',len(intent_labels))
    print('slots_ners len: ', len(slots_ners))
    return texts, intent_labels, slots_ners        

In [4]:
data_path ="../dataset/data_v2.json"
max_sent_len = params["max_sent_len"]
texts, intent_labels, slots_ners = extract_data(data_path)
# l = len(texts) // params['batch_size']
# texts = texts[:l*params['batch_size']]
# intent =  intent[:l*params['batch_size']]
# slots_ners = slots_ners[:l*params['batch_size']]
train_text = [d for i , d in enumerate(texts) if i % 10 != 0]
train_l = len(train_text) // params['batch_size']
train_text = train_text[:train_l*params['batch_size']]
valid_text = [d for i , d in enumerate(texts) if i % 10 == 0]
valid_l = len(valid_text) // params['batch_size']
valid_text = valid_text[:valid_l*params['batch_size']]

train_intent = [d for i , d in enumerate(intent_labels) if i % 10 != 0]
train_intent = train_intent[:train_l*params['batch_size']]
valid_intent = [d for i , d in enumerate(intent_labels) if i % 10 == 0]
valid_intent = valid_intent[:valid_l*params['batch_size']]

train_ner = [d for i , d in enumerate(slots_ners) if i % 10 != 0]
train_ner = train_ner[:train_l*params['batch_size']]
valid_ner = [d for i , d in enumerate(slots_ners) if i % 10 == 0]
valid_ner =valid_ner[:valid_l*params['batch_size']]

texts len:  2542
intent_lables len:  2542
slots_ners len:  2542


In [5]:
with open('../char_6.17.json', mode='r', encoding='utf-8') as f:
    dicts = json.load(f)

In [6]:
char2id = dicts['char2id']
id2char = dicts['id2char']
intent2id = dicts['intent2id']
id2intent = dicts['id2intent']
slot2id = dicts['slot2id']
id2slot = dicts['id2slot']

In [7]:
params['intent_num'] = len(intent2id)
params['slot_num'] = len(slot2id)
params['id2intent'] = id2intent
params['id2slot'] = id2slot

In [8]:
def trans2labelid(vocab, labels, max_sent_len):
    labels = [vocab[label] for label in labels]
    if len(labels) < max_sent_len:
        labels += [0] * (max_sent_len - len(labels))
    else:
        labels = labels[:max_sent_len]
    return labels

In [9]:
def read_data(txt_seqs, intent_labels, slot_ners,char2id,intent2id,slot2id,max_sent_len):
    dataset_text_labels = []
    dataset_intent_labels = []
    dataset_ner_labels = []
    
    for index in range(len(txt_seqs)):
        dataset_text_labels.append(trans2labelid(char2id,txt_seqs[index],max_sent_len))
        dataset_intent_labels.append([intent2id[intent_labels[index]]])
        dataset_ner_labels.append(trans2labelid(slot2id,slot_ners[index],max_sent_len))
    dataset_text_labels = np.array(dataset_text_labels)
    dataset_intent_labels = np.array(dataset_intent_labels)
    dataset_ner_labels = np.array(dataset_ner_labels)
    
    return dataset_text_labels, dataset_intent_labels, dataset_ner_labels 

In [10]:
tarin_seq, train_intent, train_ner =  read_data(train_text, train_intent, train_ner,char2id,intent2id,slot2id,max_sent_len) 

In [11]:
valid_seq, valid_intent, valid_ner =  read_data(valid_text, valid_intent, valid_ner,char2id,intent2id,slot2id,max_sent_len) 

In [12]:
def Dataset(txt_seqs, dataset_intent_labels, dataset_ner_labels):
    dataset = tf.data.Dataset.from_tensor_slices(({
    "Input" : txt_seqs
    },
    {
        "pre_intent":dataset_intent_labels,
        
        "pre_ner":dataset_ner_labels
    }))
    dataset = dataset.batch(params['batch_size'])
    return dataset

In [13]:
train_dataset = Dataset(tarin_seq, train_intent, train_ner)
valid_dataset = Dataset(valid_seq, valid_intent, valid_ner)

In [14]:
from tensorflow.keras.layers import Layer

class MultiHeadAttention(Layer):
    def __init__(
            self,
            heads,
            head_size,
            out_dim=None,
            use_bias=True,
#             max_value = 1,
#             min_value = -1
            **kwargs
    ):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self.heads = heads
        self.head_size = head_size
        self.out_dim = out_dim 
        self.use_bias = use_bias

    def build(self, input_shape):
        super(MultiHeadAttention, self).build(input_shape)
        self.q_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'q'
            
        )
        self.k_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'k'
        )
        self.v_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'v'
        )
        self.o_dense = tf.keras.layers.Dense( 
            units=self.out_dim,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'o'
        )

    def call(self, inputs):
        q = inputs
        k = inputs
        v = inputs
        # 线性变化
        qw = self.q_dense(q)
        kw = self.k_dense(k)
        vw = self.v_dense(v)
        # 形状变换
        qw = tf.reshape(qw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        kw = tf.reshape(kw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        vw = tf.reshape(vw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        # attention
        qkv_inputs = [qw, kw, vw]
        o = self.pay_attention_to(qkv_inputs)
        o = tf.reshape(o, (-1, tf.shape(o)[1], self.head_size * self.heads))
        o = self.o_dense(o)
        return o

    def pay_attention_to(self, inputs):
        (qw, kw, vw) = inputs[:3]
        a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
        a = a / self.head_size ** 0.5
        A = tf.nn.softmax(a)
        o = tf.einsum('bhjk,bkhd -> bjhd', A, vw)
#         print(o)
        return o

In [15]:
from tensorflow.keras.layers import concatenate, Dropout,LayerNormalization, Dense, add

In [17]:
class Encoder(tf.keras.models.Model):
    def __init__(
        self,
        layer_count,
        **kwargs
    ):
        super(Encoder, self).__init__(**kwargs)
        self.layer_count = layer_count
        
    def build(self,input_shape):
        self.MultiHeadAttention =  MultiHeadAttention(heads=16,head_size=4,out_dim=32)
        self.dropout_1 = Dropout(0.1)
        self.l1 =  LayerNormalization(name='normal')
        self.feed1 = Dense(32,name='feed')
        self.dropout1 = Dropout(0.1)
        self.l_1 =  LayerNormalization(name='normal1')
        
    def call(self,inputs):
        state = inputs
        for i in range(self.layer_count):
#             print('state: ',i)
            att1 = self.MultiHeadAttention(state)
            att_1 = add([att1,state])
            dropout1  = self.dropout_1(att_1)
            l1 = self.l1(att_1)
            feed1 =self.feed1(l1)
            dropout_1  = self.dropout1(feed1)
            l_1 = self.l_1(feed1)
            state = l_1
        return state

In [18]:
tf.keras.backend.clear_session()
text_inputs = tf.keras.layers.Input(shape=(20,),name='Input')
embed = tf.keras.layers.Embedding(500,32)(text_inputs)

l_1 = Encoder(layer_count=3)(embed)

conv = tf.keras.layers.GlobalAveragePooling1D()(l_1)
pre_intent = tf.keras.layers.Dense(params['intent_num'],activation='sigmoid',name = 'pre_intent',kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0))(conv)
pre_slot = tf.keras.layers.Dense(params['slot_num'],activation='sigmoid',name = 'pre_ner',kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0))(l_1)
model = tf.keras.Model(text_inputs,[pre_intent,pre_slot])
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input (InputLayer)              [(None, 20)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 20, 32)       16000       Input[0][0]                      
__________________________________________________________________________________________________
encoder (Encoder)               (None, 20, 32)       9600        embedding[0][0]                  
__________________________________________________________________________________________________
global_average_pooling1d (Globa (None, 32)           0           encoder[0][0]                    
_______________________________________________________________________________________

In [19]:
losses = {'pre_intent':'sparse_categorical_crossentropy','pre_ner':'sparse_categorical_crossentropy'}
metrics = { 'pre_intent': ['accuracy'],'pre_ner': ['accuracy']}
optimizer = tf.keras.optimizers.Adam(params['lr'])
model.compile(optimizer, loss=losses, metrics=metrics)

In [20]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath='../ner_model_weight/model_encoder_714.h5',save_weights_only=True,save_best_only=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=20,factor=0.8)

In [23]:
model.fit(train_dataset,epochs=params['epochs'],validation_data=valid_dataset,callbacks=[checkpoint,reduce_lr])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fbf307dc910>

In [24]:
model.save_weights('../ner_model_weight/model_encoder_714.h5')