In [1]:
import tensorflow as tf
import os
import numpy as np
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn
from tensorflow.contrib.rnn import GRUCell
import math
tf.logging.set_verbosity(tf.logging.INFO)


In [2]:
def get_default_params():
    return tf.contrib.training.HParams(
        num_embedding_size = 32,
        num_timesteps = 120,
        num_gru_size = 64,
        batch_size = 100,
        clip_lstm_grads = 1.0,
        num_fc_nodes = 64,
        learning_rate = 0.001,
        num_word_threshold = 10,
        num_attention_size = 50,

    )

hps = get_default_params()

train_file = r'.\deep_learn\jd_deep_learn\train_data.tsv'
test_file = r'.\deep_learn\jd_deep_learn\test_data.tsv'

seg_train_file = r'.\deep_learn\jd_deep_learn\seg_train_data.txt'
seg_test_file = r'.\deep_learn\jd_deep_learn\seg_test_data.txt'

vocab_padding_file = r'.\deep_learn\jd_deep_learn\jd_padding_vocab.txt'
category_file = r'.\deep_learn\jd_deep_learn\jd_category.txt'
output_dir = r'.\deep_learn\jd_deep_learn\jd_attention_gru_runout'

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [3]:
def attention(inputs, attention_size, time_major=False, return_alphas=False):
    """
    Attention mechanism layer which reduces RNN/Bi-RNN outputs with Attention vector.

    The idea was proposed in the article by Z. Yang et al., "Hierarchical Attention Networks
     for Document Classification", 2016: http://www.aclweb.org/anthology/N16-1174.
    Variables notation is also inherited from the article
    
    Args:
        inputs: The Attention inputs.
            Matches outputs of RNN/Bi-RNN layer (not final state):
                In case of RNN, this must be RNN outputs `Tensor`:
                    If time_major == False (default), this must be a tensor of shape:
                        `[batch_size, max_time, cell.output_size]`.
                    If time_major == True, this must be a tensor of shape:
                        `[max_time, batch_size, cell.output_size]`.
                In case of Bidirectional RNN, this must be a tuple (outputs_fw, outputs_bw) containing the forward and
                the backward RNN outputs `Tensor`.
                    If time_major == False (default),
                        outputs_fw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_bw.output_size]`.
                    If time_major == True,
                        outputs_fw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_bw.output_size]`.
        attention_size: Linear size of the Attention weights.
        time_major: The shape format of the `inputs` Tensors.
            If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
            If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
            Using `time_major = True` is a bit more efficient because it avoids
            transposes at the beginning and end of the RNN calculation.  However,
            most TensorFlow data is batch-major, so by default this function
            accepts input and emits output in batch-major form.
        return_alphas: Whether to return attention coefficients variable along with layer's output.
            Used for visualization purpose.
    Returns:
        The Attention output `Tensor`.
        In case of RNN, this will be a `Tensor` shaped:
            `[batch_size, cell.output_size]`.
        In case of Bidirectional RNN, this will be a `Tensor` shaped:
            `[batch_size, cell_fw.output_size + cell_bw.output_size]`.
    """

    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.array_ops.transpose(inputs, [1, 0, 2])

    hidden_size = inputs.shape[2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    w_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))

    with tf.name_scope('v'):
        # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
        #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
        v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)

    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    vu = tf.tensordot(v, u_omega, axes=1, name='vu')  # (B,T) shape
    alphas = tf.nn.softmax(vu, name='alphas')         # (B,T) shape

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)

    if not return_alphas:
        return output
    else:
        return output, alphas



class Vocab:
    def __init__(self, filename, num_word_threshold):
        self._word_to_id = {}
        self._unk = -1
        self._padding = 1
        self._num_word_threshold = num_word_threshold
        self._read_dict(filename)
        
    def _read_dict(self, filename):
        with open(filename, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        for line in lines:
            word, frequency = line.strip('\r\n').split('\t')
            frequency = int(frequency)
            if frequency < self._num_word_threshold:
                continue
            idx = len(self._word_to_id)
            if word == '<UNK>':
                self._unk = idx
            self._word_to_id[word] = idx
    def word_to_id(self, word):
        return self._word_to_id.get(word, self._unk)
    
    @property
    def unk(self):
        return self._unk
    @property
    def padding(self):
        return self._padding
    
    def size(self):
        return len(self._word_to_id)
    
    def sentence_to_id(self, sentence):
        
        word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
        return word_ids
class CategoryDict:
    def __init__(self, filename):
        self._category_to_id = {}
        with open(filename, 'r',encoding='utf-8') as f:
            lines = f.readlines()
        for line in lines:
            category = line.strip('\r\n')
            idx = len(self._category_to_id)
            self._category_to_id[category] = idx
    def size(self):
        print('category_to_id dict:{}'.format(self._category_to_id))
        return len(self._category_to_id)
    
    def category_to_id(self, category):
        if not category in self._category_to_id:
            raise Exception("{} is not in our category".format(category))
        
        return self._category_to_id[category]
    
          
vocab = Vocab(vocab_padding_file, hps.num_word_threshold)
vocab_size = vocab.size()

tf.logging.info('vocab_size: {}'.format(vocab_size))
category_vocab = CategoryDict(category_file)
num_classes = category_vocab.size()
tf.logging.info('category_size: {}'.format(num_classes))
test_str = '5'
tf.logging.info('id:{}'.format(category_vocab.category_to_id(test_str)))

INFO:tensorflow:vocab_size: 4762
category_to_id dict:{'5': 0, '1': 1, '2': 2, '3': 3}
INFO:tensorflow:category_size: 4
INFO:tensorflow:id:0


In [4]:
class TextDataSet:
    def __init__(self, filename, vocab, category_vocab, num_timesteps):
        self._vocab = vocab
        self._category_vocab = category_vocab
        self._num_timesteps = num_timesteps
        
        self._inputs = []
        self._outputs = []
        
        self._indicator = 0
        self._parse_file(filename)
        
    def _parse_file(self, filename):
        tf.logging.info('Loading data from {}'.format(filename))
        import csv
        csv_reader = csv.reader(open(filename, encoding='utf-8'))
        for row in csv_reader:
            label, content = row[0].replace('\n', '').split('\t')
            id_label = self._category_vocab.category_to_id(label) # 标签转number
            # print(id_label)
            id_words = self._vocab.sentence_to_id(content) # 文字转number
            # print(id_words)
            
            id_words = id_words[0: self._num_timesteps] # 过长截断
            padding_num = self._num_timesteps - len(id_words) # 过短padding
            id_words = id_words + [self._vocab.padding for _ in range(padding_num)]
            self._inputs.append(id_words)
            self._outputs.append(id_label)
            
        self._inputs = np.asarray(self._inputs, dtype=np.int32)
        self._outputs = np.asarray(self._outputs, dtype=np.int32)
        self._random_shuffle()
        
    def _random_shuffle(self):
        p = np.random.permutation(len(self._inputs))
        self._inputs = self._inputs[p]
        self._outputs = self._outputs[p]
    
    def next_batch(self, batch_size):
        end_indicator = self._indicator + batch_size
        if end_indicator > len(self._inputs):
            self._random_shuffle()
            self._indicator = 0
            end_indicator = batch_size
        if end_indicator > len(self._inputs):
            raise Exception("batch size: {} is too large".format(batch_size))
        
        batch_inputs = self._inputs[self._indicator: end_indicator]
        batch_output = self._outputs[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_inputs, batch_output

train_dataset = TextDataSet(seg_train_file, vocab, category_vocab, hps.num_timesteps)
test_dataset = TextDataSet(seg_test_file, vocab, category_vocab, hps.num_timesteps)

INFO:tensorflow:Loading data from .\deep_learn\jd_deep_learn\seg_train_data.txt
INFO:tensorflow:Loading data from .\deep_learn\jd_deep_learn\seg_test_data.txt


In [5]:
# print(train_dataset.next_batch(3)[0])
for i in train_dataset.next_batch(3)[0].tolist():
    
    if 1 in i:
        print(i.index(1))
    else:
        print(120)

# print(np.array([list(x).index(1)+1 if 1 in x else hps.num_timesteps for x in train_dataset.next_batch(3)]))


35
17
33


In [6]:
def create_model(hps, vocab_size, num_classes):
    num_timesteps = hps.num_timesteps
    batch_size = hps.batch_size
    
    inputs = tf.placeholder(tf.int32, (None, num_timesteps), name='inputs')
    outputs = tf.placeholder(tf.int32, (None, ), name='outputs')
    seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    global_step = tf.Variable(
        tf.zeros([], tf.int64), name='global_step', trainable=False)
    embedding_initalizer = tf.random_uniform_initializer(-1.0, 1.0)
    with tf.variable_scope(
        'embedding', initializer=embedding_initalizer
    ):
        embeddings = tf.get_variable(
            'embedding', [vocab_size+2, hps.num_embedding_size],
            tf.float32
        )
        tf.summary.histogram('embeddings', embeddings)
        embed_inputs = tf.nn.embedding_lookup(embeddings, inputs)
    scale = 1.0 / math.sqrt(hps.num_embedding_size+hps.num_gru_size) / 3.0
    gru_init = tf.random_uniform_initializer(-scale, scale)
    with tf.variable_scope('gru', initializer=gru_init):
        rnn_outputs, _ = bi_rnn(GRUCell(hps.num_gru_size), 
                                GRUCell(hps.num_gru_size),
                                inputs=embed_inputs, 
                                sequence_length=seq_len_ph, 
                                dtype=tf.float32)
        
        tf.summary.histogram('rnn_outputs', rnn_outputs)
    attention_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    
    with tf.variable_scope('attention', initializer=attention_init):
        attention_output, alphas = \
            attention(rnn_outputs, 
                      hps.num_embedding_size, 
                      return_alphas=True)
        tf.summary.histogram('alphas', alphas)
        
    drop = tf.nn.dropout(attention_output, keep_prob)
    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    with tf.variable_scope('fc', initializer=fc_init):
        fc1 = tf.layers.dense(drop,
                              hps.num_fc_nodes,
                              activation=tf.nn.relu,
                              name='fc1')
        fc1_droput = tf.layers.dropout(fc1, keep_prob)
        logits = tf.layers.dense(fc1_droput,
                                 num_classes,
                                 name='fc2')
    with tf.name_scope('metrics'):
        sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=outputs
        )
        loss = tf.reduce_mean(sofmax_loss)
        
        y_pre = tf.arg_max(tf.nn.softmax(logits=logits),
                           1,
                           output_type=tf.int32, name='y_pre')
        correct_pred = tf.equal(outputs, y_pre)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    with tf.name_scope('train_op'):
        tvars = tf.trainable_variables()
        for var in tvars:
            tf.logging.info('variable name: {}'.format(var.name))
            
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, tvars), hps.clip_lstm_grads
        )
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
    
    return ((inputs, outputs, keep_prob, seq_len_ph),
            (loss, accuracy),
            (train_op, global_step)
            )

placeholders, metrics, others = create_model(
    hps, vocab_size, num_classes
)
inputs, outputs, keep_prod, seq_len_ph = placeholders
loss, accuracy = metrics
train_op, global_step = others

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Use `tf.math.argmax` instead
INFO:tensorflow:variable name: embedding/embedding:0
INFO:tensorflow:variable name: gru/bidirectional_rnn/fw/gru_ce

In [7]:

loss_summary = tf.summary.scalar('loss', loss)
accuracy_summary = tf.summary.scalar('accuracy', accuracy)


megred_summary = tf.summary.merge_all()
megred_summary_test = tf.summary.merge([loss_summary, accuracy_summary])

LOG_DIR = r'.\deep_learn\jd_deep_learn\jd_attention_gru_runout'
run_label = 'run_jd_comment_attention_tensorboard'
run_dir = os.path.join(LOG_DIR, run_label)
if not os.path.exists(run_dir):
    os.mkdir(run_dir)
train_log_dir = os.path.join(run_dir, 'train')
test_log_dir = os.path.join(run_dir, 'test')

if not os.path.exists(train_log_dir):
    os.mkdir(train_log_dir)

if not os.path.exists(test_log_dir):
    os.mkdir(test_log_dir)


In [8]:
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.8
test_keep_prob_value = 1.0

test_steps = 100
num_train_steps = 10000
output_summary_every_steps = 100


with tf.Session() as sess:
    sess.run(init_op)
    train_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    test_writer = tf.summary.FileWriter(test_log_dir)
    
    for i in range(num_train_steps):
        batch_inputs, batch_labels = train_dataset.next_batch(
            hps.batch_size
        )
        eval_ops = [loss, accuracy, train_op, global_step]
        should_output_summary = ((i+1) % output_summary_every_steps == 0)
        if should_output_summary:
            eval_ops.append(megred_summary)
        # print(batch_inputs)
        # print(batch_inputs.tolist())
        seq_len = np.array([x.index(1)+1 if 1 in x else hps.num_timesteps for x in batch_inputs.tolist()])
        # print(seq_len.shape)
        # [loss, accuracy, train_op, global_step, mergred_summary]
        outputs_val = sess.run(eval_ops,
                               feed_dict = {
                                inputs: batch_inputs,
                                outputs: batch_labels,
                                   keep_prod: train_keep_prob_value,
                                   seq_len_ph: seq_len
                               })
        loss_val, accuracy_val, = outputs_val[0:2]
        if should_output_summary:
            train_summary_str = outputs_val[-1]
            train_writer.add_summary(train_summary_str, i+1)
            test_summarys_str = sess.run([megred_summary_test],
                                         feed_dict={
                                             inputs: batch_inputs,
                                            outputs: batch_labels,
                                            keep_prod: train_keep_prob_value,
                                                seq_len_ph: seq_len 
                                         })[0]
            test_writer.add_summary(test_summarys_str, i+1)
        if (i+1) % 100 == 0:
            tf.logging.info("Train Step: {}, loss: {}, accuracy: {}".format(i, loss_val, accuracy_val))

        if (i+1) % 1000 == 0:
            all_test_acc_cal = []
            for j in range(test_steps):
                test_inputs, test_labels = test_dataset.next_batch(hps.batch_size)
                seq_len = np.array([list(x).index(1)+1 if 1 in x else hps.num_timesteps for x in test_inputs])
                test_val = sess.run([loss, accuracy, train_op, global_step],
                # test_val = sess.run([loss, accuracy, optimzer],
                                    feed_dict= {
                                        inputs: test_inputs,
                                        outputs: test_labels,
                                        seq_len_ph: seq_len,
                                        keep_prod: test_keep_prob_value,
                                    })
                test_loss_val, test_accuarcy_val, _, test_step_val = test_val
                # test_loss_val, test_accuarcy_val, optimzer = test_val
                all_test_acc_cal.append(test_accuarcy_val)
            test_acc = np.mean(all_test_acc_cal)
            tf.logging.info("Test Step: {}, loss: {}, accuracy: {}".format(i, test_loss_val, test_acc))
    builder = tf.saved_model.builder.SavedModelBuilder(r'.\deep_learn\jd_deep_learn\jd_attention_gru_runout\1')
    input = {
        'inputs': tf.saved_model.utils.build_tensor_info(inputs), 
            'keep_prob': tf.saved_model.utils.build_tensor_info(keep_prod),
        'seq_len_ph': tf.saved_model.utils.build_tensor_info(seq_len_ph)
             }
    for j in sess.graph.as_graph_def().node:
        print(j.name)
    output = {'outputs': tf.saved_model.utils.build_tensor_info(sess.graph.get_tensor_by_name('y_pre'))}
    # signature = tf.saved_model.signature_def_utils.build_signature_def(input, output, 'jd_comment_lstm')
    signature_def_map = {'jd_comment_attention_gru_predict':tf.saved_model.signature_def_utils.build_signature_def(
        inputs=input, outputs=output,method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
    )}
    builder.add_meta_graph_and_variables(sess, tags=[tf.saved_model.tag_constants.SERVING],
                                         signature_def_map=signature_def_map)
    builder.save()
    print('builder done')

INFO:tensorflow:Train Step: 99, loss: 1.116432785987854, accuracy: 0.5299999713897705
INFO:tensorflow:Train Step: 199, loss: 1.0858169794082642, accuracy: 0.5699999928474426
INFO:tensorflow:Train Step: 299, loss: 1.0871084928512573, accuracy: 0.4399999976158142
INFO:tensorflow:Train Step: 399, loss: 0.9055508375167847, accuracy: 0.5699999928474426
INFO:tensorflow:Train Step: 499, loss: 1.0280667543411255, accuracy: 0.6200000047683716
INFO:tensorflow:Train Step: 599, loss: 0.8829067945480347, accuracy: 0.6200000047683716
INFO:tensorflow:Train Step: 699, loss: 0.8730707764625549, accuracy: 0.6100000143051147
INFO:tensorflow:Train Step: 799, loss: 0.8226014971733093, accuracy: 0.6000000238418579
INFO:tensorflow:Train Step: 899, loss: 0.8157306909561157, accuracy: 0.6800000071525574
INFO:tensorflow:Train Step: 999, loss: 0.807142972946167, accuracy: 0.6100000143051147
INFO:tensorflow:Test Step: 999, loss: 0.8718658685684204, accuracy: 0.6102999448776245
INFO:tensorflow:Train Step: 1099, lo

KeyError: "The name 'y_pre:0' refers to a Tensor which does not exist. The operation, 'y_pre', does not exist in the graph."