In [1]:
import tensorflow as tf
import numpy as np
import datetime
from sklearn.metrics import average_precision_score


In [2]:
class Settings:
    def __init__(self):
        self.vocab_size = 299529
        self.num_steps = 70
        self.num_epochs = 300
        self.num_classes = 12
        self.gru_size = 340
        self.keep_prob = 0.5
        self.num_layers = 1
        self.pos_size = 1
        self.pos_num = 141
        self.big_num = 50


In [3]:
class GRU:
    def __init__(self, is_training, word_embeddings, settings):

        self.num_steps = num_steps = settings.num_steps
        self.vocab_size = vocab_size = settings.vocab_size
        self.num_classes = num_classes = settings.num_classes
        self.gru_size = settings.gru_size
        self.big_num = big_num = settings.big_num
        
        self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_wod')
        self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_pos1')
        self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_pos2')
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='input_y')
        self.total_shape = tf.placeholder(dtype=tf.int32, shape=[big_num + 1], name='total_shape')
        total_num = self.total_shape[-1]
        
        word_embedding = tf.get_variable(initializer=word_embeddings, name='word_embedding')
        pos1_embedding = tf.get_variable('pos1_embedding', [settings.pos_num, settings.pos_size])
        pos2_embedding = tf.get_variable('pos2_embedding', [settings.pos_num, settings.pos_size])
        
        atteintion_w = tf.get_variable('attention_omega', [self.gru_size, 1])
        sen_a = tf.get_variable('attention_A', [self.gru_size])
        sen_r = tf.get_variable('query_r', [self.gru_size, 1])
        relation_embedding = tf.get_variable('relation_embedding', [self.num_classes, self.gru_size])
        sen_d = tf.get_variable('bias_d', [self.num_classes])

        gru_cell_forward = tf.contrib.rnn.GRUCell(self.gru_size)
        gru_cell_backward = tf.contrib.rnn.GRUCell(self.gru_size)
        # settings.keep_prob使用dropout降低过拟合的问题
        if is_training and settings.keep_prob < 1:
            gru_cell_forward = tf.contrib.rnn.DropoutWrapper(gru_cell_forward, output_keep_prob=settings.keep_prob)
            gru_cell_backward = tf.contrib.rnn.DropoutWrapper(gru_cell_backward, output_keep_prob=settings.keep_prob)

        cell_forward = tf.contrib.rnn.MultiRNNCell([gru_cell_forward] * settings.num_layers)
        cell_backward = tf.contrib.rnn.MultiRNNCell([gru_cell_backward] * settings.num_layers)

        sen_repre = []
        sen_alpha = []
        sen_s = []
        sen_out = []
        self.prob = []
        self.predictions = []
        self.loss = []
        self.accuracy = []
        self.total_loss = 0.0

        self._initial_state_forward = cell_forward.zero_state(total_num, tf.float32)
        self._initial_state_backward = cell_backward.zero_state(total_num, tf.float32)

        # embedding layer
        # 实际上tf.nn.embedding_lookup的作用就是找到要寻找的embedding data中的对应的行下的vector。
        # 輸入的shape=[句子数，字数，200词向量+实体1位置编码+实体2位置编码]
        inputs_forward = tf.concat(axis=2, values=[tf.nn.embedding_lookup(word_embedding, self.input_word),
                                                   tf.nn.embedding_lookup(pos1_embedding, self.input_pos1),
                                                   tf.nn.embedding_lookup(pos2_embedding, self.input_pos2)])
        inputs_backward = tf.concat(axis=2,
                                    values=[tf.nn.embedding_lookup(word_embedding, tf.reverse(self.input_word, [1])),
                                            tf.nn.embedding_lookup(pos1_embedding, tf.reverse(self.input_pos1, [1])),
                                            tf.nn.embedding_lookup(pos2_embedding, tf.reverse(self.input_pos2, [1]))])

        outputs_forward = []

        state_forward = self._initial_state_forward

        # Bi-GRU layer
        with tf.variable_scope('GRU_FORWARD') as scope:
            for step in range(num_steps):
                if step > 0:
                    scope.reuse_variables()
                (cell_output_forward, state_forward) = cell_forward(inputs_forward[:, step, :], state_forward)
                outputs_forward.append(cell_output_forward)

        outputs_backward = []

        state_backward = self._initial_state_backward
        with tf.variable_scope('GRU_BACKWARD') as scope:
            for step in range(num_steps):
                if step > 0:
                    scope.reuse_variables()
                (cell_output_backward, state_backward) = cell_backward(inputs_backward[:, step, :], state_backward)
                outputs_backward.append(cell_output_backward)
        # tf.concat(axis=1,  按列拼接
        output_forward = tf.reshape(tf.concat(axis=1, values=outputs_forward), [total_num, num_steps, self.gru_size])
        output_backward = tf.reverse(
            tf.reshape(tf.concat(axis=1, values=outputs_backward), [total_num, num_steps, self.gru_size]),
            [1])

        output_h = tf.add(output_forward, output_backward)
        output_h = tf.reshape(tf.tanh(output_h), [total_num * num_steps, self.gru_size])
        
        # sentence-level attention layer
        for i in range(big_num):
            sen_input_1 = output_h[self.total_shape[i]:self.total_shape[i + 1]]
            sen_input_2 = tf.tanh(sen_input_1)
            sen_repre.append(sen_input_2)

            batch_size = self.total_shape[i + 1] - self.total_shape[i]

            sen_alpha_1 = tf.matmul(sen_input_2, sen_r)
            sen_alpha_2 = tf.reshape(sen_alpha_1, [batch_size])
            sen_alpha_3 = tf.nn.softmax(sen_alpha_2)
            sen_alpha_4 = tf.reshape(sen_alpha_3, [1, batch_size])
            sen_alpha.append(sen_alpha_4)
            # sen_alpha.append(
            #     tf.reshape(tf.nn.softmax(tf.reshape(tf.matmul(tf.multiply(sen_repre[i], sen_a), sen_r), [batch_size])),
            #                [1, batch_size]))

            sen_s_1 = tf.matmul(sen_alpha[i], sen_repre[i])
            sen_s_2 = tf.reshape(sen_s_1, [self.gru_size, 1])
            sen_s.append(sen_s_2)
            # sen_s.append(tf.reshape(tf.matmul(sen_alpha[i], sen_repre[i]), [self.gru_size, 1]))

            sen_out_1 = tf.matmul(relation_embedding, sen_s[i])
            sen_out_2 = tf.reshape(sen_out_1, [self.num_classes])
            sen_out_3 = tf.add(sen_out_2, sen_d)
            sen_out.append(sen_out_3)
            # sen_out.append(tf.add(tf.reshape(tf.matmul(relation_embedding, sen_s[i]), [self.num_classes]), sen_d))

            self.prob.append(tf.nn.softmax(sen_out[i]))

            with tf.name_scope("output"):
                # tf.argmax 最大值的索引值
                self.predictions.append(tf.argmax(self.prob[i], 0, name="predictions"))

            with tf.name_scope("loss"):
                # sen_out[i] 输出结果，input_y[i]实际的分类标签
                self.loss.append(
                    tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=sen_out[i], labels=self.input_y[i])))
                if i == 0:
                    self.total_loss = self.loss[i]
                else:
                    self.total_loss += self.loss[i]

            with tf.name_scope("accuracy"):
                self.accuracy.append(
                    tf.reduce_mean(tf.cast(tf.equal(self.predictions[i], tf.argmax(self.input_y[i], 0)), "float"),
                                   name="accuracy"))

        # tf.summary.scalar('loss',self.total_loss)
        tf.summary.scalar('loss', self.total_loss)
        # regularization 在损失函数上加上正则项是防止过拟合的一个重要方法
        self.l2_loss = tf.contrib.layers.apply_regularization(regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                                                              weights_list=tf.trainable_variables())
        
        self.final_loss = self.total_loss + self.l2_loss
        tf.summary.scalar('l2_loss', self.l2_loss)
        tf.summary.scalar('final_loss', self.final_loss)


In [6]:
def main():
    # the path to save models
    save_path = './实体关系/jupyter_BGRU_2ATT/model/'

    print('reading wordembedding')
    wordembedding = np.load('./实体关系/jupyter_BGRU_2ATT/data/vec.npy')

    print('reading training data')
    train_y = np.load('./实体关系/jupyter_BGRU_2ATT/data/train_y.npy')
    train_word = np.load('./实体关系/jupyter_BGRU_2ATT/data/train_word.npy')
    train_pos1 = np.load('./实体关系/jupyter_BGRU_2ATT/data/train_pos1.npy')
    train_pos2 = np.load('./实体关系/jupyter_BGRU_2ATT/data/train_pos2.npy')

    settings = Settings()
    settings.vocab_size = len(wordembedding)
    settings.num_classes = len(train_y[0])

    big_num = settings.big_num

    with tf.Graph().as_default():

        sess = tf.Session()
        with sess.as_default():
            # 初始化的权重参数
            initializer = tf.contrib.layers.xavier_initializer()
            with tf.variable_scope("model", reuse=None, initializer=initializer):
                m = GRU(is_training=True, word_embeddings=wordembedding, settings=settings)
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(0.0005)

            train_op = optimizer.minimize(m.final_loss, global_step=global_step)
            # 初始化模型的参数
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver(max_to_keep=None)

            merged_summary = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter('./train_loss', graph=sess.graph)

            def train_step(word_batch, pos1_batch, pos2_batch, y_batch, big_num):

                feed_dict = {}
                total_shape = []
                total_num = 0
                total_word = []
                total_pos1 = []
                total_pos2 = []
                for i in range(len(word_batch)):
                    total_shape.append(total_num)
                    total_num += len(word_batch[i])
                    for word in word_batch[i]:
                        total_word.append(word)
                    for pos1 in pos1_batch[i]:
                        total_pos1.append(pos1)
                    for pos2 in pos2_batch[i]:
                        total_pos2.append(pos2)
                total_shape.append(total_num)
                total_shape = np.array(total_shape)
                total_word = np.array(total_word)
                total_pos1 = np.array(total_pos1)
                total_pos2 = np.array(total_pos2)

                feed_dict[m.total_shape] = total_shape
                feed_dict[m.input_word] = total_word
                feed_dict[m.input_pos1] = total_pos1
                feed_dict[m.input_pos2] = total_pos2
                feed_dict[m.input_y] = y_batch

                temp, step, loss, accuracy, summary, l2_loss, final_loss = sess.run(
                    [train_op, global_step, m.total_loss, m.accuracy, merged_summary, m.l2_loss, m.final_loss],feed_dict)
                # accuracy为50个句子能够正确预测的数据
                time_str = datetime.datetime.now().isoformat()
                accuracy = np.reshape(np.array(accuracy), (big_num))
                acc = np.mean(accuracy)
                summary_writer.add_summary(summary, step)

                if step % 50 == 0:
                    tempstr = "{}: step {}, softmax_loss {:g}, acc {:g}".format(time_str, step, loss, acc)
                    print(tempstr)

            for one_epoch in range(settings.num_epochs):
                temp_order = list(range(len(train_word)))
                np.random.shuffle(temp_order)
                for i in range(int(len(temp_order) / float(settings.big_num))):

                    temp_word = []
                    temp_pos1 = []
                    temp_pos2 = []
                    temp_y = []

                    temp_input = temp_order[i * settings.big_num:(i + 1) * settings.big_num]
                    for k in temp_input:
                        temp_word.append(train_word[k])
                        temp_pos1.append(train_pos1[k])
                        temp_pos2.append(train_pos2[k])
                        temp_y.append(train_y[k])
                    num = 0
                    for single_word in temp_word:
                        num += len(single_word)
                        if len(single_word) != 1:
                            test = single_word

                    if num > 1500:
                        print('out of range')
                        continue

                    temp_word = np.array(temp_word)
                    temp_pos1 = np.array(temp_pos1)
                    temp_pos2 = np.array(temp_pos2)
                    temp_y = np.array(temp_y)

                    train_step(temp_word, temp_pos1, temp_pos2, temp_y, settings.big_num)

                    current_step = tf.train.global_step(sess, global_step)
                    if current_step > 200 and current_step % 100 == 0:
                        print('saving model')
                        path = saver.save(sess, save_path + 'GRU_new_cw2vec_ATT_GRU_model', global_step=current_step)
                        tempstr = 'have saved model to ' + path
                        print(tempstr)
                        return 0



In [7]:
main()


reading wordembedding
reading training data
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Use tf.cast instead.
2019-12-12T21:41:23.054946: step 50, softmax_loss 102.297, acc 0.34
2019-12-12T21:45:51.352347: step 100, softmax_loss 118.591, acc 0.22
2019-12-12T21:50:22.578549: step 150, softmax_loss 101.286, acc 0.24
2019-12-12T21:54:42.767551: step 200, softmax_loss 110.657, 

0