## 卷积神经网络 — 使用Gluon
现在我们使用tensorflow/slim来实现上一章的卷积神经网络。

### 定义模型
下面是LeNet在tensorflow/slim里的实现，注意到我们不再需要实现去计算每层的输入大小，尤其是接在卷积后面的那个全连接层。

### 获取数据和训练
剩下的跟上一章没什么不同。

In [1]:
import tensorflow as tf
slim = tf.contrib.slim

def net(input):
    with tf.name_scope('lenet'):
        conv1 = slim.conv2d(input, 20, [5, 5], scope='conv1_1', weights_initializer=tf.random_normal_initializer(stddev=0.01))
        pool1 = slim.max_pool2d(conv1, [2, 2], scope='pool_1')
        conv2 = slim.conv2d(pool1, 50, [3, 3], scope='conv2_2', weights_initializer=tf.random_normal_initializer(stddev=0.01))
        pool2 = slim.max_pool2d(conv2, [2, 2], scope='pool_2')
        pool2 = slim.flatten(pool2)
        fc1 = slim.fully_connected(pool2, 128, scope='fc1', weights_initializer=tf.random_normal_initializer(stddev=0.01))
        fc2 = slim.fully_connected(fc1, 10, scope='fc2', activation_fn=None, weights_initializer=tf.random_normal_initializer(stddev=0.01))
        return conv1, conv2, fc1, fc2


In [2]:
def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope(var.op.name):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

In [1]:
import sys
import numpy as np

sys.path.append('../../utils')
import utils

data_dir = '../../data/fashion_mnist'
train_images, train_labels, test_images, test_labels = utils.load_data_fashion_mnist(data_dir, one_hot=True)
print train_images.shape
print test_images.shape

from tensorflow.contrib.learn.python.learn.datasets.mnist import DataSet
train_dataset = DataSet(train_images, train_labels, one_hot=True)
test_dataset = DataSet(test_images, test_labels, one_hot=True)

learning_rate = 1e-2
max_steps = 1000
batch_size = 256
height = width = 28
num_channels = 1
num_outputs = 10

#tf.reset_default_graph()

input_placeholder = tf.placeholder(tf.float32, [None, height, width, num_channels])
#input_placeholder = tf.placeholder(tf.float32, [None, height*width*num_channels])

gt_placeholder = tf.placeholder(tf.int64, [None, num_outputs])
conv1, conv2, fc1, logits = net(input_placeholder)
loss = tf.losses.softmax_cross_entropy(logits=logits,  onehot_labels=gt_placeholder)
acc = utils.accuracy(logits, gt_placeholder)
test_images_reshape = np.reshape(np.squeeze(test_images), (test_images.shape[0], height, width))
    
#optimizer = tf.train.AdamOptimizer(learning_rate)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

train_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
test_acc = []


with tf.name_scope('output'):
    variable_summaries(conv1)
    variable_summaries(conv2)
    variable_summaries(fc1)
    variable_summaries(logits)
    
var_list = tf.trainable_variables()
for var in var_list:
    variable_summaries(var)
    
with tf.name_scope('gradients'):

    #optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    grad_var_list = optimizer.compute_gradients(loss=loss, var_list=var_list)
    grad_list = [grad for (grad, var) in grad_var_list]
    for  grad, var in grad_var_list:
        print var.op.name
        with tf.name_scope(var.op.name):
            variable_summaries(grad)
        
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('log/', sess.graph)


Extracting ../../data/fashion_mnist/train-images-idx3-ubyte.gz
Extracting ../../data/fashion_mnist/train-labels-idx1-ubyte.gz
Extracting ../../data/fashion_mnist/t10k-images-idx3-ubyte.gz
Extracting ../../data/fashion_mnist/t10k-labels-idx1-ubyte.gz
(60000, 28, 28, 1)
(10000, 28, 28, 1)


NameError: name 'tf' is not defined

In [None]:
for step in range(max_steps):
    data, label = train_dataset.next_batch(batch_size)
    data = np.reshape(data, (batch_size, height, width, num_channels))
    feed_dict = {input_placeholder: data, gt_placeholder: label}
    conv1_, summary_, loss_, acc_, _ = sess.run([conv1, merged, loss, acc, train_op], feed_dict=feed_dict)
    train_writer.add_summary(summary_, step)
    if step % 100 == 0:
        print("Batch %d, Loss: %f, Train acc %f " % (step, loss_, acc_))
        for i in range(100):
            test_data, test_label = test_dataset.next_batch(100)
            test_data = np.reshape(test_data, (100, height, width, num_channels))
            test_loss_, test_acc_ = sess.run([loss, acc], feed_dict={input_placeholder: test_data, gt_placeholder: test_label})
            test_acc.append(test_acc_)
        print ("Test Loss: %f, Test acc %f " % (np.mean(test_loss_), np.mean(test_acc_)))

train_writer.close()

Batch 0, Loss: 2.431581, Train acc 0.140625 
Test Loss: 1.541882, Test acc 0.540000 
Batch 100, Loss: 0.707734, Train acc 0.875000 
Test Loss: 0.680130, Test acc 0.870000 
Batch 200, Loss: 0.598317, Train acc 0.878906 
Test Loss: 0.675763, Test acc 0.880000 
Batch 300, Loss: 0.645145, Train acc 0.882812 
Test Loss: 0.550532, Test acc 0.920000 
Batch 400, Loss: 0.623004, Train acc 0.882812 
Test Loss: 0.578785, Test acc 0.880000 
Batch 500, Loss: 0.602844, Train acc 0.875000 
Test Loss: 0.665458, Test acc 0.860000 
Batch 600, Loss: 0.541238, Train acc 0.906250 
Test Loss: 0.559770, Test acc 0.900000 
Batch 700, Loss: 0.569063, Train acc 0.925781 
Test Loss: 0.556408, Test acc 0.930000 
Batch 800, Loss: 0.513513, Train acc 0.914062 
Test Loss: 0.623122, Test acc 0.920000 
Batch 900, Loss: 0.549423, Train acc 0.914062 
Test Loss: 0.613668, Test acc 0.870000 
