# model_encoder forward

In [1]:
import numpy as np
import h5py
import json

In [2]:
params = {
    'model_weight' : '../ner_model_weight/model_encoder_714.h5',
    'embed_size' : 500,
    'max_sent_len': 20,
    'heads':16,
    'head_size':4,
    'batch_size': 64,
    'lr' : 0.001,
    'max_sent_len': 20,
    'epochs': 500,
    'drops' : [0.1]
}

In [3]:
heads=params['heads']
head_size=params['head_size']

In [4]:
with open('../char_6.17.json', mode='r', encoding='utf-8') as f:
    dicts = json.load(f)
char2id = dicts['char2id']
id2char = dicts['id2char']
intent2id = dicts['intent2id']
id2intent = dicts['id2intent']
slot2id = dicts['slot2id']
id2slot = dicts['id2slot']

In [5]:
def sigmoid(x):
    y = 1 / (1 + np.exp(-x))
    return y

def softmax(x):
    exp_x = np.exp(x)
    sum_exp_x = np.sum(exp_x,axis=-1,keepdims=True)
    y = exp_x / sum_exp_x
    return y

def embedding(x,embed_size,embed):
    x_one= np.zeros((len(x),embed_size))
    x_one[range(len(x)), x] = 1
    x_embed = np.dot(x_one, embed)
    return x_embed

def GlobalAveragePooling1D(x,step_axis=0):
    return np.mean(x,axis=step_axis)

def LayerNormalization(x,gamma,beta,step_axis = -1,epsilon=1e-3):
    mean = np.mean(x,axis = step_axis)
    mean = np.expand_dims(mean,axis=1)
    variance = np.var(x,axis = step_axis)
    variance = np.expand_dims(variance,axis=1)
    inv = 1.0 / np.sqrt(variance + epsilon)
#     print(np.shape(inv))
    gamma = np.expand_dims(gamma,axis=0)
    beta = np.expand_dims(beta,axis=0)
    inv = gamma *inv
    return x * inv + (beta - mean * inv)

def dense(x,gamma, bias):
#     print(np.shape(x))
    y = np.matmul(x,gamma)
    y = np.add(y,bias)
    return y

In [6]:
def MultiHeadAttention(x,heads,head_size,atten_q_bias,atten_q_kernel,atten_k_bias,atten_k_kernel,atten_v_bias,atten_v_kernel,atten_o_bias,atten_o_kernel):
    q = dense(x,atten_q_kernel,atten_q_bias)
#     print('q: ',q)
    k = dense(x,atten_k_kernel,atten_k_bias)
#     print('k: ',k)
    v = dense(x,atten_v_kernel,atten_v_bias)
#     print('v: ',v )model_encoder
    
    qw = np.reshape(q,(-1,heads,head_size))
    kw = np.reshape(k,(-1,heads,head_size))
    vw = np.reshape(v,(-1,heads,head_size))
    print(np.shape(qw))
    
    a = np.einsum('jhd,khd->hjk', qw, kw)
    a = a / head_size ** 0.5
    A = softmax(a)
    o = np.einsum('hjk,khd -> jhd', A, vw)
    
    print(np.shape(o))
    o = np.reshape(o,(-1,heads*head_size))
    print(np.shape(o))
    o = dense(o,atten_o_kernel,atten_o_bias)
    return o

In [7]:
file_path = '../ner_model_weight/model_encoder_714.h5'
f = h5py.File(file_path, 'r')

In [12]:
def get_weight(file_path):
    f = h5py.File(file_path,'r')
    
    embed = f['embedding']['embedding']['embeddings:0'][:]
    
    encoder_bias = f['encoder']['encoder']['feed']['bias:0'][:]
    encoder_kernel = f['encoder']['encoder']['feed']['kernel:0'][:]
    
    encoder_normal_beta = f['encoder']['encoder']['normal']['beta:0'][:]
    encode_normal_gamma = f['encoder']['encoder']['normal']['gamma:0'][:]
    
    encoder_normal_beta1 = f['encoder']['encoder']['normal1']['beta:0'][:]
    encoder_normal_gamma1 = f['encoder']['encoder']['normal1']['gamma:0'][:]
    
    atten_q_bias = f['encoder']['encoder']['multi_head_attention']['q']['bias:0'][:]
    atten_q_kernel = f['encoder']['encoder']['multi_head_attention']['q']['kernel:0'][:]
    
    atten_k_bias = f['encoder']['encoder']['multi_head_attention']['k']['bias:0'][:]
    atten_k_kernel = f['encoder']['encoder']['multi_head_attention']['k']['kernel:0'][:]
    
    atten_v_bias = f['encoder']['encoder']['multi_head_attention']['v']['bias:0'][:]
    atten_v_kernel = f['encoder']['encoder']['multi_head_attention']['v']['kernel:0'][:]
    
    atten_o_bias = f['encoder']['encoder']['multi_head_attention']['o']['bias:0'][:]
    atten_o_kernel = f['encoder']['encoder']['multi_head_attention']['o']['kernel:0'][:]
    
    pre_intent_bias = f['pre_intent']['pre_intent']['bias:0'][:]
    pre_intent_kernel = f['pre_intent']['pre_intent']['kernel:0'][:]
    
    pre_ner_bias = f['pre_ner']['pre_ner']['bias:0'][:]
    pre_ner_kernel = f['pre_ner']['pre_ner']['kernel:0'][:]
    
    return embed,encoder_bias,encoder_kernel,encoder_normal_beta,encode_normal_gamma,encoder_normal_beta1,encoder_normal_gamma1,\
            atten_q_bias,atten_q_kernel,atten_k_bias,atten_k_kernel,atten_v_bias,atten_v_kernel,atten_o_bias,atten_o_kernel,\
            pre_intent_bias,pre_intent_kernel,pre_ner_bias,pre_ner_kernel

In [13]:
def trans2labelid(vocab, labels, max_sent_len):
    labels = [vocab[label] for label in labels]
    if len(labels) < max_sent_len:
        labels += [0] * (max_sent_len - len(labels))
    else:
        labels = labels[:max_sent_len]
    return labels

In [14]:
def test(inputs,h5file_path):
    embed,encoder_bias,encoder_kernel,encoder_normal_beta,encode_normal_gamma,encoder_normal_beta1,encoder_normal_gamma1,\
            atten_q_bias,atten_q_kernel,atten_k_bias,atten_k_kernel,atten_v_bias,atten_v_kernel,atten_o_bias,atten_o_kernel,\
            pre_intent_bias,pre_intent_kernel,pre_ner_bias,pre_ner_kernel = get_weight(file_path)
    
    x = trans2labelid(char2id,inputs,params['max_sent_len'])
    embed = embedding(x,params['embed_size'], embed)
    state = embed
    for i in range(3):
        att = MultiHeadAttention(state,heads,head_size,atten_q_bias,atten_q_kernel,atten_k_bias,\
                                 atten_k_kernel,atten_v_bias,atten_v_kernel,atten_o_bias,atten_o_kernel)
        att_1 = np.add(att,state)
        l = LayerNormalization(att_1,encode_normal_gamma,encoder_normal_beta)
        feed1 = dense(l,encoder_kernel,encoder_bias)
        l1 = LayerNormalization(feed1,encoder_normal_gamma1,encoder_normal_beta1)
        state = l1
    conv = GlobalAveragePooling1D(state)
    
    pre_intent = dense(conv,pre_intent_kernel,pre_intent_bias)
    pre_intent = sigmoid(pre_intent)
    
    pre_slot = dense(state,pre_ner_kernel,pre_ner_bias)
    pre_slot = sigmoid(pre_slot)
    
    return pre_intent, pre_slot 

In [15]:
inputs = '打开空调'
np_pre_intent, np_pre_slot  = test(inputs,file_path) 

(20, 16, 4)
(20, 16, 4)
(20, 64)
(20, 16, 4)
(20, 16, 4)
(20, 64)
(20, 16, 4)
(20, 16, 4)
(20, 64)


In [16]:
np_pre_intent

array([1.95904586e-09, 2.04386779e-05, 1.66595914e-04, 9.81184326e-06,
       1.24571212e-04, 5.37920218e-06, 2.57746185e-04, 1.79327330e-04,
       6.70664053e-06, 4.14397897e-06, 1.61926877e-05, 6.62351278e-09,
       6.09047303e-05, 6.30200950e-08, 5.87803221e-05, 1.04552780e-05,
       5.56724286e-05, 1.74767173e-06, 5.83089042e-06, 1.70645293e-05,
       1.26748242e-06, 2.43458928e-06, 1.42205218e-04, 1.67237164e-05,
       4.44439417e-06, 1.15679629e-04, 1.75295308e-06, 1.68209113e-05,
       9.88302989e-05, 3.50426021e-06, 4.88769135e-05, 3.19596877e-05,
       1.85471583e-06, 1.43897584e-09, 2.72267536e-08, 3.83518695e-05,
       9.27672908e-06, 1.60597614e-06, 1.57129852e-05, 9.12048096e-06,
       1.67779951e-07, 4.94079045e-05, 7.63037671e-07, 4.46608465e-05,
       9.90124232e-08, 5.94714345e-06, 3.40175920e-01, 1.54744429e-04,
       8.20822876e-06, 1.23600913e-04, 1.37792943e-06, 3.70436733e-06,
       1.23600317e-07, 4.13216434e-06, 9.34163704e-06])

In [17]:
np_pre_slot

array([[1.12049938e-04, 1.43518947e-06, 2.20492084e-05, 8.24728068e-07,
        5.63667548e-07, 1.36262937e-05, 4.87082576e-07, 4.25770958e-07,
        3.53080060e-06, 1.87324678e-06, 3.25442955e-06, 8.77282563e-07,
        1.05962736e-05, 4.88877307e-08, 2.21502381e-05, 4.42867319e-07,
        6.00204142e-07, 6.67324142e-06, 1.08494573e-04, 2.67930951e-06,
        3.50265651e-06, 1.85574161e-02, 1.97405918e-06, 1.75415477e-08,
        2.73151924e-06, 5.80408137e-08, 1.72910285e-01, 1.71942884e-05,
        1.02581654e-04, 2.05186997e-04, 9.74425699e-07, 5.51951045e-06,
        1.81612423e-06, 1.71990945e-04, 2.42315763e-03, 1.32583144e-03],
       [2.73026992e-04, 8.18767088e-06, 1.43162994e-06, 4.41072472e-07,
        1.75200975e-06, 7.03978053e-05, 2.41747974e-06, 4.74158778e-06,
        9.20071206e-06, 1.03604179e-05, 2.03574631e-05, 3.57565832e-06,
        3.78581066e-04, 1.81650827e-07, 9.71770970e-06, 8.16040008e-08,
        9.71164197e-07, 8.76409435e-05, 9.10042021e-04, 1.88063

# encoder模型验证

In [18]:
from tensorflow.keras.layers import concatenate, Dropout,LayerNormalization, Dense, add
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')

NameError: name 'os' is not defined

In [None]:
from tensorflow.keras.layers import Layer

class MultiHeadAttention(Layer):
    def __init__(
            self,
            heads,
            head_size,
            out_dim=None,
            use_bias=True,
#             max_value = 1,
#             min_value = -1l1
            **kwargs
    ):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self.heads = heads
        self.head_size = head_size
        self.out_dim = out_dim 
        self.use_bias = use_bias

    def build(self, input_shape):
        super(MultiHeadAttention, self).build(input_shape)
        self.q_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'q'
            
        )
        self.k_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'k'
        )
        self.v_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'v'
        )
        self.o_dense = tf.keras.layers.Dense( 
            units=self.out_dim,
            use_bias=self.use_bias,
            kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            name = 'o'
        )

    def call(self, inputs):
        q = inputs
        k = inputs
        v = inputs
        # 线性变化
        qw = self.q_dense(q)
        kw = self.k_dense(k)
        vw = self.v_dense(v)
        # 形状变换
        qw = tf.reshape(qw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        kw = tf.reshape(kw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        vw = tf.reshape(vw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        # attention
        qkv_inputs = [qw, kw, vw]
        o = self.pay_attention_to(qkv_inputs)
        o = tf.reshape(o, (-1, tf.shape(o)[1], self.head_size * self.heads))
        o = self.o_dense(o)
        return o

    def pay_attention_to(self, inputs):
        (qw, kw, vw) = inputs[:3]
        a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
        a = a / self.head_size ** 0.5
        A = tf.nn.softmax(a)
        o = tf.einsum('bhjk,bkhd -> bjhd', A, vw)
        return o

In [19]:
class Encoder(tf.keras.models.Model):
    def __init__(
        self,
        layer_count,
        **kwargs
    ):
        super(Encoder, self).__init__(**kwargs)
        self.layer_count = layer_count
        
    def build(self,input_shape):
        self.MultiHeadAttention =  MultiHeadAttention(heads=16,head_size=4,out_dim=32)
        self.dropout_1 = Dropout(0.1)
        self.l1 =  LayerNormalization(name='normal')
        self.feed1 = Dense(32,name='feed')
        self.dropout1 = Dropout(0.1)
        self.l_1 =  LayerNormalization(name='normal1')
        
    def call(self,inputs):
        state = inputs
        for _ in range(self.layer_count):
            att1 = self.MultiHeadAttention(state)
            att_1 = add([att1,state])
            dropout1  = self.dropout_1(att_1)
            l1 = self.l1(dropout1)
            feed1 =self.feed1(l1)
            dropout_1  = self.dropout1(feed1)
            l_1 = self.l_1(dropout_1)
            state = l_1
        return state

In [20]:
params['intent_num'] = len(intent2id)
params['slot_num'] = len(slot2id)
params['id2intent'] = id2intent
params['id2slot'] = id2slot

In [21]:
tf.keras.backend.clear_session()
text_inputs = tf.keras.layers.Input(shape=(20,),name='Input')
embed = tf.keras.layers.Embedding(500,32)(text_inputs)

l_1 = Encoder(layer_count=3)(embed)

conv = tf.keras.layers.GlobalAveragePooling1D()(l_1)
pre_intent = tf.keras.layers.Dense(params['intent_num'],activation='sigmoid',name = 'pre_intent',kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0))(conv)
pre_slot = tf.keras.layers.Dense(params['slot_num'],activation='sigmoid',name = 'pre_ner',kernel_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0),
            bias_constraint = tf.keras.constraints.MinMaxNorm(min_value=-1.0))(l_1)
model = tf.keras.Model(text_inputs,[pre_intent,pre_slot])
model.summary()

TypeError: MultiHeadAttention() got an unexpected keyword argument 'out_dim'

In [None]:
model.load_weights('../ner_model_weight/model_encoder_714.h5')

In [None]:
x = trans2labelid(char2id,inputs,params['max_sent_len'])

In [88]:
pre_intent,pre_slot = model.predict([x])

In [89]:
pre_intent

array([[1.9590465e-09, 2.0438689e-05, 1.6659604e-04, 9.8118408e-06,
        1.2457115e-04, 5.3791969e-06, 2.5774655e-04, 1.7932746e-04,
        6.7066326e-06, 4.1439694e-06, 1.6192693e-05, 6.6235017e-09,
        6.0904629e-05, 6.3020160e-08, 5.8780417e-05, 1.0455287e-05,
        5.5672532e-05, 1.7476705e-06, 5.8308751e-06, 1.7064507e-05,
        1.2674833e-06, 2.4345902e-06, 1.4220510e-04, 1.6723672e-05,
        4.4443959e-06, 1.1567961e-04, 1.7529503e-06, 1.6820906e-05,
        9.8830460e-05, 3.5042683e-06, 4.8876893e-05, 3.1959706e-05,
        1.8547166e-06, 1.4389775e-09, 2.7226763e-08, 3.8351842e-05,
        9.2767259e-06, 1.6059797e-06, 1.5712985e-05, 9.1204747e-06,
        1.6777959e-07, 4.9407943e-05, 7.6303849e-07, 4.4660752e-05,
        9.9012532e-08, 5.9471431e-06, 3.4017593e-01, 1.5474478e-04,
        8.2082279e-06, 1.2360090e-04, 1.3779290e-06, 3.7043731e-06,
        1.2360017e-07, 4.1321618e-06, 9.3416484e-06]], dtype=float32)

In [91]:
pre_slot

array([[[1.12049856e-04, 1.43518957e-06, 2.20493130e-05, 8.24729000e-07,
         5.63667527e-07, 1.36263034e-05, 4.87082389e-07, 4.25770082e-07,
         3.53079258e-06, 1.87324451e-06, 3.25443239e-06, 8.77283298e-07,
         1.05962690e-05, 4.88877845e-08, 2.21502014e-05, 4.42867645e-07,
         6.00205851e-07, 6.67324593e-06, 1.08494336e-04, 2.67931819e-06,
         3.50265464e-06, 1.85574424e-02, 1.97405666e-06, 1.75415060e-08,
         2.73151932e-06, 5.80408397e-08, 1.72910050e-01, 1.71943102e-05,
         1.02581580e-04, 2.05187214e-04, 9.74423187e-07, 5.51951598e-06,
         1.81612199e-06, 1.71990541e-04, 2.42315349e-03, 1.32583210e-03],
        [2.73026555e-04, 8.18765784e-06, 1.43163254e-06, 4.41073752e-07,
         1.75201092e-06, 7.03979240e-05, 2.41747853e-06, 4.74157605e-06,
         9.20070761e-06, 1.03604179e-05, 2.03575146e-05, 3.57565705e-06,
         3.78580677e-04, 1.81650677e-07, 9.71773079e-06, 8.16037797e-08,
         9.71164923e-07, 8.76408740e-05, 9.1004301