# 建立并训练网络

In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from six.moves import cPickle as pickle
from six.moves import range
from RNNCell_diy import diyLSTMCell

%matplotlib inline

读取训练数据

In [2]:
pfile = './concat_image_train_data.pickle'
with open(pfile, "rb") as f:
    train_data = pickle.load(f)
train_dataset = train_data["train_dataset"]
train_dataset.shape = train_dataset.shape + (1,)
train_labels = train_data["train_labels"]
train_labels_num = train_data["train_labels_num"]

In [3]:
cdic = {"a":0, "b":1, "c":2, "d":3, "e":4, "f":5, "g":6, "h":7, "i":8, "j":9, "remain":10}
cdic_r = {}
for key, value in cdic.items():
    cdic_r[value] = key

In [4]:
image_size_h = 56
image_size_w = 112
num_channels = 1

构建网络结构

In [5]:
batch_size = 50
kernel_size = 5
pooling_size = 2
channels_one = 12
channels_two = 32
hidden1_size = 300
hidden2_size = 260
dropout = 0.5
lam = 0.01
max_time_step = 3
rnn_num_nodes = 260
rnn_num_layers = 3


graph = tf.Graph()
with graph.as_default():
    #input data
    tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size, image_size_h, image_size_w, num_channels))
    tf_train_labels = tf.placeholder(tf.int32,shape=(batch_size, max_time_step))
    tf_train_labels_num = tf.placeholder(tf.int32,shape=(batch_size))
    
    #variables
    layer_weight1 = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, num_channels, channels_one],stddev=0.1))
    layer_biases1 = tf.Variable(tf.zeros([channels_one]), name="bias1")
    
    layer_weight3 = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, channels_one, channels_two], stddev=0.1))
    layer_biases3 = tf.Variable(tf.constant(1.0, shape=[channels_two]), name="bias3")
    
    neuron_num = ((((image_size_h-pooling_size)//pooling_size+1-kernel_size+1-pooling_size)//pooling_size+1)**2)*channels_two
    layer_weight5 = tf.Variable(tf.truncated_normal([neuron_num, hidden1_size], stddev=0.1))
    layer_biases5 = tf.Variable(tf.constant(1.0, shape=[hidden1_size]), name="bias5")
    
    layer_weight6 = tf.Variable(tf.truncated_normal([hidden1_size, hidden2_size], stddev=0.1))
    layer_biases6 = tf.Variable(tf.constant(1.0, shape=[hidden2_size]), name="bias6")
    
    # Classifier weights and biases.
    w = tf.Variable(tf.truncated_normal([rnn_num_nodes, len(cdic)], -0.1, 0.1))
    b = tf.Variable(tf.zeros([len(cdic)]), name="bias_o")
    
    # Model
    def conv_model(data):
        conv = tf.nn.conv2d(data, layer_weight1, [1,1,1,1], padding="SAME")+layer_biases1
        pooling = tf.nn.relu(tf.nn.max_pool(conv, [1,2,4,1], [1,2,4,1], padding="VALID"))
        conv = tf.nn.conv2d(pooling, layer_weight3, [1,1,1,1], padding="VALID")+layer_biases3
        pooling = tf.nn.relu(tf.nn.max_pool(conv, [1,2,2,1], [1,2,2,1], padding="VALID"))
        shape = pooling.get_shape().as_list()
        reshape = tf.reshape(pooling,[shape[0],shape[1]*shape[2]*shape[3]])
        hidden1 = tf.nn.dropout(tf.matmul(reshape, layer_weight5)+layer_biases5, dropout)
        hidden2 = tf.nn.relu(tf.matmul(hidden1, layer_weight6)+layer_biases6)
        return hidden2
            
    def rnn_model1(input_data):
        cell_list = []
        for i in range(rnn_num_layers):
            rnn_cell = tf.contrib.rnn.BasicLSTMCell(rnn_num_nodes)
            rnn_cell = tf.contrib.rnn.DropoutWrapper(cell=rnn_cell, input_keep_prob=(1.0 - dropout))
            cell_list.append(rnn_cell)
        rnn_cells = tf.contrib.rnn.MultiRNNCell(cell_list)
        with tf.variable_scope("rnn1"):
            rnn_outputs, rnn_state = tf.nn.dynamic_rnn(
                rnn_cells,
                input_data,
                dtype=tf.float32)
        return rnn_outputs, rnn_state
    
    def rnn_model2(input_data, init_state):
        cell_list = []
        for i in range(rnn_num_layers):
            rnn_cell = tf.contrib.rnn.BasicLSTMCell(rnn_num_nodes)
            rnn_cell = tf.contrib.rnn.DropoutWrapper(cell=rnn_cell, input_keep_prob=(1.0 - dropout))
            cell_list.append(rnn_cell)
        rnn_cells = tf.contrib.rnn.MultiRNNCell(cell_list)
        with tf.variable_scope("rnn2"):
            rnn_outputs, rnn_state = tf.nn.dynamic_rnn(
                rnn_cells,
                input_data,
                dtype=tf.float32,
                initial_state=init_state,
                sequence_length=tf_train_labels_num+1,
                swap_memory=True)
        return rnn_outputs, rnn_state
        
    
    #loss
    conv_out = conv_model(tf_train_dataset)
    conv_out = tf.reshape(conv_out,[batch_size,1,-1])
    rnn1_output, rnn1_state = rnn_model1(conv_out)
    input_data = tf.concat((tf.constant(cdic["remain"],shape=[batch_size,1]), tf_train_labels),1)
    input_data = tf.one_hot(input_data,len(cdic))
    tf_train_labels_c = tf.concat((tf_train_labels,tf.constant(cdic["remain"],shape=[batch_size,1])),1)
    tf_train_labels_c = tf.reshape(tf.transpose(tf_train_labels_c), [-1])
    tf_train_labels_c = tf.one_hot(tf_train_labels_c,len(cdic))
    rnn2_outputs, _ = rnn_model2(input_data, rnn1_state)
    rnn2_outputs = tf.transpose(rnn2_outputs, [1,0,2])
    rnn2_outputs = tf.reshape(rnn2_outputs,[-1, rnn_num_nodes])
    logits = tf.matmul(rnn2_outputs, w) + b
    
    tv = tf.trainable_variables()
    regularization_cost = tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv if not("bias" in v.name)])
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels_c, logits=logits))+lam*regularization_cost
    
    # Optimizer.
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(
      1.0, global_step, 100, 0.95, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    gradients, v = zip(*optimizer.compute_gradients(loss))
    gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
    optimizer = optimizer.apply_gradients(
      zip(gradients, v), global_step=global_step)
    
    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    
    #model save
    tv = tf.trainable_variables()
    print(tv)
    saver = tf.train.Saver()

[<tf.Variable 'Variable:0' shape=(5, 5, 1, 12) dtype=float32_ref>, <tf.Variable 'bias1:0' shape=(12,) dtype=float32_ref>, <tf.Variable 'Variable_1:0' shape=(5, 5, 12, 32) dtype=float32_ref>, <tf.Variable 'bias3:0' shape=(32,) dtype=float32_ref>, <tf.Variable 'Variable_2:0' shape=(4608, 300) dtype=float32_ref>, <tf.Variable 'bias5:0' shape=(300,) dtype=float32_ref>, <tf.Variable 'Variable_3:0' shape=(300, 260) dtype=float32_ref>, <tf.Variable 'bias6:0' shape=(260,) dtype=float32_ref>, <tf.Variable 'Variable_4:0' shape=(260, 11) dtype=float32_ref>, <tf.Variable 'bias_o:0' shape=(11,) dtype=float32_ref>, <tf.Variable 'rnn1/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0' shape=(520, 1040) dtype=float32_ref>, <tf.Variable 'rnn1/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0' shape=(1040,) dtype=float32_ref>, <tf.Variable 'rnn1/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0' shape=(520, 1040) dtype=float32_ref>, <tf.Variable 'rnn1/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0' s

In [6]:
def logprob(predictions, labels):
  """Log-probability of the true labels in a predicted batch."""
  predictions[predictions < 1e-10] = 1e-10
  return np.sum(np.multiply(labels, -np.log(predictions))) / labels.shape[0]

第一次训练

In [7]:
with tf.Session(graph=graph) as sess:
    tf.global_variables_initializer().run()
    mean_loss = 0
    for step in range(7001):
        feed_dict = {}
        batch_index = np.random.randint(0,len(train_dataset), batch_size)
        feed_dict[tf_train_dataset] = train_dataset[batch_index]
        feed_dict[tf_train_labels] = train_labels[batch_index]
        feed_dict[tf_train_labels_num] = train_labels_num[batch_index]
        _,l, predictions, lr, labels = sess.run(
          [optimizer, loss, train_prediction, learning_rate, tf_train_labels_c], feed_dict=feed_dict)
        mean_loss += l
        if step % 100 == 0:
            if step > 0:
                mean_loss = mean_loss / 100
            # The mean loss is an estimate of the loss over the last few batches.
            print(
            'Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            mean_loss = 0
            print('Minibatch perplexity: %.2f' % float(
                np.exp(logprob(predictions, labels))))
    saver.save(sess,"model/model.ckpt")

Average loss at step 0: 79.039337 learning rate: 1.000000
Minibatch perplexity: 11.00
Average loss at step 100: 34.730603 learning rate: 0.950000
Minibatch perplexity: 5.95
Average loss at step 200: 6.387562 learning rate: 0.902500
Minibatch perplexity: 5.72
Average loss at step 300: 2.497618 learning rate: 0.857375
Minibatch perplexity: 5.87
Average loss at step 400: 1.910515 learning rate: 0.814506
Minibatch perplexity: 5.60
Average loss at step 500: 1.793289 learning rate: 0.773781
Minibatch perplexity: 5.60
Average loss at step 600: 1.779321 learning rate: 0.735092
Minibatch perplexity: 5.46
Average loss at step 700: 1.756707 learning rate: 0.698337
Minibatch perplexity: 4.97
Average loss at step 800: 1.754028 learning rate: 0.663420
Minibatch perplexity: 5.27
Average loss at step 900: 1.741110 learning rate: 0.630249
Minibatch perplexity: 5.08
Average loss at step 1000: 1.741074 learning rate: 0.598737
Minibatch perplexity: 5.20
Average loss at step 1100: 1.735081 learning rate: 0

使用已训练的模型继续训练

In [9]:
with tf.Session(graph=graph) as sess:
    saver.restore(sess,"model/model.ckpt")
    mean_loss = 0
#     reset_step = global_step.assign(0)
#     reset_step.eval()
    for step in range(2001):
        feed_dict = {}
        batch_index = np.random.randint(0,len(train_dataset), batch_size)
        feed_dict[tf_train_dataset] = train_dataset[batch_index]
        feed_dict[tf_train_labels] = train_labels[batch_index]
        feed_dict[tf_train_labels_num] = train_labels_num[batch_index]
        _,l, predictions, lr, labels = sess.run(
          [optimizer, loss, train_prediction, learning_rate, tf_train_labels_c], feed_dict=feed_dict)
        mean_loss += l
        if step % 100 == 0:
            if step > 0:
                mean_loss = mean_loss / 100
            # The mean loss is an estimate of the loss over the last few batches.
            print(
            'Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            mean_loss = 0
            print('Minibatch perplexity: %.2f' % float(
                np.exp(logprob(predictions, labels))))
    saver.save(sess,"model/model.ckpt")

INFO:tensorflow:Restoring parameters from model/model.ckpt
Average loss at step 0: 1.634242 learning rate: 0.009888
Minibatch perplexity: 4.89
Average loss at step 100: 1.679172 learning rate: 0.009394
Minibatch perplexity: 4.98
Average loss at step 200: 1.671983 learning rate: 0.008924
Minibatch perplexity: 5.22
Average loss at step 300: 1.679619 learning rate: 0.008478
Minibatch perplexity: 4.88
Average loss at step 400: 1.677630 learning rate: 0.008054
Minibatch perplexity: 5.17
Average loss at step 500: 1.681681 learning rate: 0.007651
Minibatch perplexity: 5.31
Average loss at step 600: 1.668924 learning rate: 0.007269
Minibatch perplexity: 5.26
Average loss at step 700: 1.678885 learning rate: 0.006905
Minibatch perplexity: 5.31
Average loss at step 800: 1.675388 learning rate: 0.006560
Minibatch perplexity: 5.10
Average loss at step 900: 1.671372 learning rate: 0.006232
Minibatch perplexity: 5.04
Average loss at step 1000: 1.670407 learning rate: 0.005921
Minibatch perplexity: 4