In [1]:
import os
import json
import codecs
import numpy as np
import tensorflow as tf
import yaml
import DM_process_v1 as DM_process

os.environ["CUDA_VISIBLE_DEVICES"] = '1'
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
tf.__version__

'2.3.0'

In [18]:
params = {
    'batch_size': 16,
    'epochs': 300,
    'drops' : [0.1],
    'heads':16,
    'head_size':8,
    'out_dim':64
         }

In [3]:
with open('./DM_char.json', mode='r', encoding='utf-8') as f:
    dicts = json.load(f)

In [4]:
path = '../stories/all_stories.yml'
with open(path, 'r', encoding='utf-8') as f:
    dataset = yaml.load(f.read(),Loader=yaml.Loader)

In [5]:
action2id = dicts['action2id']
intent2id = dicts['intent2id']
slots2id = dicts['entities2id']
id2action = dicts['id2action']
id2intent = dicts['id2intent']
id2slots = dicts['id2entities']

In [6]:
previous_action_len = len(action2id)
# print(previous_action_len)
slots_len = len(slots2id)
# print(slots_len)
user_intent_len = len(intent2id)
# print(user_intent_len)

In [7]:
data_set = DM_process.split_data(dataset)

In [8]:
previous_action, slots, user_intent, action = DM_process.extract_conv_data(data_set,action2id,slots2id,intent2id)

In [9]:
def Dataset(previous_action_inputs, slots_inputs,user_intent_inputs,pre_action):
    dataset = tf.data.Dataset.from_tensor_slices(({
    "previous_action_inputs" : previous_action_inputs,
    "slots_inputs" : slots_inputs,
    "user_intent_inputs" : user_intent_inputs
    },
    {
        "pre_action" : pre_action
    }))
    dataset = dataset.batch(params['batch_size'])
    return dataset

In [10]:
train_dataset =  Dataset(previous_action, slots, user_intent, action)

In [11]:
from tensorflow.keras.layers import Layer

class MultiHeadAttention(Layer):
    def __init__(
            self,
            heads,
            head_size,
            out_dim=None,
            use_bias=True,
            #             max_value = 1,
            #             min_value = -1
            **kwargs
    ):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self.heads = heads
        self.head_size = head_size
        self.out_dim = out_dim
        self.use_bias = use_bias

    def build(self, input_shape):
        super(MultiHeadAttention, self).build(input_shape)
        self.q_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            name='q'

        )
        self.k_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            name='k'
        )
        self.v_dense = tf.keras.layers.Dense(
            units=self.head_size * self.heads,
            use_bias=self.use_bias,
            name='v'
        )
        self.o_dense = tf.keras.layers.Dense(
            units=self.out_dim,
            use_bias=self.use_bias,
            name='o'
        )

    def call(self, inputs):
        q = inputs
        k = inputs
        v = inputs
        # 线性变化
        qw = self.q_dense(q)
        kw = self.k_dense(k)
        vw = self.v_dense(v)
        # 形状变换
        qw = tf.reshape(qw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        kw = tf.reshape(kw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        vw = tf.reshape(vw, (-1, tf.shape(q)[1], self.heads, self.head_size))
        # attention
        qkv_inputs = [qw, kw, vw]
        o = self.pay_attention_to(qkv_inputs)
        o = tf.reshape(o, (-1, tf.shape(o)[1], self.head_size * self.heads))
        o = self.o_dense(o)
        return o

    def pay_attention_to(self, inputs):
        (qw, kw, vw) = inputs[:3]
        a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
        a = a / self.head_size ** 0.5
        A = tf.nn.softmax(a)
        o = tf.einsum('bhjk,bkhd -> bjhd', A, vw)
        return o

In [12]:
from tensorflow.keras.layers import concatenate, Dropout,LayerNormalization, Dense, add

class Encoder(tf.keras.models.Model):
    def __init__(
            self,
            layer_count,
            **kwargs
    ):
        super(Encoder, self).__init__(**kwargs)
        self.layer_count = layer_count

    def build(self, input_shape):
        self.MultiHeadAttention = MultiHeadAttention(heads=params['heads'], head_size=params['head_size'], out_dim=params['out_dim'])
        self.dropout= Dropout(0.1)
        self.l1 = LayerNormalization(name='normal')
        self.feed1 = Dense(params['out_dim'], activation='relu',name='feed')
        self.feed2 = Dense(params['out_dim'],name='feed1')
        self.dropout1 = Dropout(0.1)
        self.l_1 = LayerNormalization(name='normal1')

    def call(self, inputs):
        state = inputs
        for i in range(self.layer_count):
            att = self.MultiHeadAttention(state)
            att1 = self.dropout(att)
            ad = add([att1, state])
            l1 = self.l1(ad)
            feed1 = self.feed1(l1)
            feed2 = self.feed2(feed1)
            dropout_1 = self.dropout1(feed2)
            ad1 = add([l1,dropout_1])
            l_1 = self.l_1(ad1)
            state = l_1
        return state

In [21]:
tf.keras.backend.clear_session()
previous_action_inputs = tf.keras.layers.Input(shape=(previous_action_len,), name = 'previous_action_inputs')
slots_inputs = tf.keras.layers.Input(shape = (slots_len,), name = 'slots_inputs')
user_intent_inputs = tf.keras.layers.Input(shape = (user_intent_len,), name = 'user_intent_inputs')

previous_action_embed = tf.keras.layers.Embedding(128,64)(previous_action_inputs)
slots_embed = tf.keras.layers.Embedding(128,64)(slots_inputs)
user_intent_embed = tf.keras.layers.Embedding(128,64)(user_intent_inputs)

utter_inputs = tf.keras.layers.concatenate([previous_action_embed,slots_embed,user_intent_embed],axis=1)
atten = Encoder(layer_count=3)(utter_inputs)
bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128,return_sequences=True))(atten)
x_conv = tf.keras.layers.GlobalAveragePooling1D()(bilstm)
pre_action = tf.keras.layers.Dense(previous_action_len, activation='softmax',name = 'pre_action')(x_conv)
model = tf.keras.Model([previous_action_inputs,slots_inputs,user_intent_inputs],pre_action)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
previous_action_inputs (InputLa [(None, 35)]         0                                            
__________________________________________________________________________________________________
slots_inputs (InputLayer)       [(None, 13)]         0                                            
__________________________________________________________________________________________________
user_intent_inputs (InputLayer) [(None, 43)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 35, 64)       8192        previous_action_inputs[0][0]     
_______________________________________________________________________________________

In [22]:
losses = {'pre_action': 'categorical_crossentropy'}
metrics = {'pre_action': ['accuracy']}
optimizer = tf.keras.optimizers.Nadam()
model.compile(optimizer, loss=losses, metrics=metrics)

In [23]:
file_path = '../DM_model_weight/DM_weight_encoder.h5'
# checkpoint = tf.keras.callbacks.ModelCheckpoint(file_path,
#                                                         save_weights_only=False, save_best_only=True)
learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(patience=50, factor=0.5)
callbacks_list = [learning_rate_reduction]

In [24]:
model.fit(train_dataset,epochs=params['epochs'])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f1264764dd0>

In [15]:
4# model.save_weights('../DM_model_weight/DM_weight_629.h5')

In [16]:
model.save(file_path)