# 计划

第一步，先干两个事，第一个把CIFAR10数据导出来，然后可视化，第二个，把VGG16参数导进来，然后看看数据格式。

第二步，试着搭个tensorflow框架，能跑起来，看看还有什么难点没有解决。

第三步，添加记录，能够利用tensorboard查看优化过程。

第四步，调参，看各个超参数如何影响结果。

In [25]:
import numpy as np
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import math

In [2]:
Batch_size = 128
IMG_hight = 32
IMG_width = 32
Channels = 3
Classes = 10
Learning_rate = 0.01
Max_step = 100

# 利用队列从文件中直接读取数据

1 data_dir要严格采用给出格式，'/'不可以

2 这个例程只针对cifar-10-batches-bin二进制文件，py数据集不支持

3 注意函数中对图像数据标准化的指令，会导致看到的图像不正常，暂时没找到替代函数放到train()中去。

In [3]:
def read_cifar10(data_dir, is_train, batch_size):
    Label_bytes = 1
    IMG_hight = 32
    IMG_width = 32
    Channels = 3
    IMG_bytes = IMG_hight * IMG_width * Channels
    Classes = 10
    
    # convert data_dir to tensor
    if is_train:
        filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)for i in range(1, 6)]
    else:
        filenames = [os.path.join(data_dir, 'test_batch.bin')]
    filename_queue = tf.train.string_input_producer(filenames)
    
    # define a reader
    reader = tf.FixedLengthRecordReader(Label_bytes+IMG_bytes)
    key, value = reader.read(filename_queue)
    record_bytes = tf.decode_raw(value, tf.uint8)
    label = tf.slice(record_bytes, [0], [Label_bytes])
    image = tf.slice(record_bytes, [Label_bytes], [IMG_bytes])
    image = tf.reshape(image, [Channels, IMG_hight, IMG_width])
    image = tf.transpose(image, [1, 2, 0])
    image = tf.cast(image, tf.float32)
    #  Linearly scales image to have zero mean and unit norm
    image = tf.image.per_image_standardization(image)
    image, label_batch = tf.train.shuffle_batch([image, label], batch_size=batch_size,
                                                num_threads=64, capacity=2000,
                                                min_after_dequeue=300)
    label_batch = tf.one_hot(label_batch, depth=Classes)
    label_batch = tf.cast(label_batch, tf.int32)
    label_batch = tf.reshape(label_batch, [batch_size, Classes])
    return image, label_batch

In [4]:
def test_read_cifar10():
    data_dir = ".//data//cifar-10-batches-bin//"
    Batch_size = 128
    is_train = True

    img_batch, label_batch = read_cifar10(data_dir=data_dir, is_train=is_train, batch_size=Batch_size)
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        images, labels = sess.run([img_batch, label_batch])
        print(images.shape)
        print(labels.shape)
        for i in range(16):
            plt.subplot(4, 4, i+1)
            plt.imshow(images[i].astype('uint8'))
            plt.axis('off')
        plt.show
        coord.request_stop()
        coord.join(threads)
# test_read_cifar10()

# 搭建VGG16网络

使用tensorflow搭建VGG16网络结构，注意tf.layers是tf.nn的高阶封装，不能导入已训练参数，这里选用tf.nn。

In [5]:
def conv(layer_name, x, out_channels, kernel_size=[3,3], strides=[1,1,1,1]):
    in_channels = x.get_shape()[-1]
    with tf.variable_scope(layer_name):
        w = tf.get_variable(name='weights', shape=[kernel_size[0], kernel_size[1], in_channels, out_channels],
                           initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable(name='biases', shape=[out_channels], 
                           initializer=tf.constant_initializer(0.0))
        x = tf.nn.conv2d(x, w, strides, padding='SAME', name='conv')
        x = tf.nn.bias_add(x, b, name='bias_add')
        x = tf.nn.relu(x, name='relu')
    return x


def FC_layer(layer_name, x, out_nodes):
    shape = x.get_shape()
    if len(shape) == 4:
        size = shape[1] * shape[2] * shape[3]
    else:
        size = shape[1]
    with tf.variable_scope(layer_name):
        w = tf.get_variable(name='weights', shape=[size, out_nodes],
                           initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable(name='biases', shape=[out_nodes],
                           initializer=tf.constant_initializer(0.0))
        flat_x = tf.reshape(x, [-1, size])
        x = tf.nn.bias_add(tf.matmul(flat_x, w), b)
        x = tf.nn.relu(x)
    return x

In [6]:
def Net_VGG16(x, Classes):
    with tf.name_scope('VGG16'):
        x = conv('conv1_1', x, 64, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv1_2', x, 64, kernel_size=[3,3], strides=[1,1,1,1])
        x = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool1')

        x = conv('conv2_1', x, 128, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv2_2', x, 128, kernel_size=[3,3], strides=[1,1,1,1])
        x = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool2')   

        x = conv('conv3_1', x, 256, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv3_2', x, 256, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv3_3', x, 256, kernel_size=[3,3], strides=[1,1,1,1])    
        x = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool3')

        x = conv('conv4_1', x, 512, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv4_2', x, 512, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv4_3', x, 512, kernel_size=[3,3], strides=[1,1,1,1])    
        x = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool4')

        x = conv('conv5_1', x, 512, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv5_2', x, 512, kernel_size=[3,3], strides=[1,1,1,1])
        x = conv('conv5_3', x, 512, kernel_size=[3,3], strides=[1,1,1,1])    
        x = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool5')

        x = FC_layer('fc6', x, out_nodes=4096)
        x = FC_layer('fc7', x, out_nodes=4096)
        x = FC_layer('fc8', x, out_nodes=Classes)
    return x

# 导入Pre_train参数

注意tf.get_variable的用法，这个的测试函数中，要调用VGG16，先定义好变量，再利用函数加载参数。

In [7]:
def load_with_skip(data_path, session, skip_layer):
    data_dict = np.load(data_path, encoding='latin1').item()
    keys = sorted(data_dict.keys())
    for key in keys:
        if key not in skip_layer:
            with tf.variable_scope(key, reuse=True):
                for subkey, data in zip(('weights', 'biases'), data_dict[key]):
                    session.run(tf.get_variable(subkey).assign(data))

In [8]:
def test_load_with_skip():
    tf.reset_default_graph()
    pre_trained_weights = ".//vgg16_pretrain//vgg16.npy"

    x = tf.placeholder(tf.float32, shape=[Batch_size, IMG_width, IMG_hight, Channels])
    logits = Net_VGG16(x, Classes)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])
    
    data_dict = np.load(pre_trained_weights, encoding='latin1').item()
    keys = sorted(data_dict.keys())
    for layer_name in keys:
        if layer_name not in ['fc6', 'fc7', 'fc8']:
            with tf.variable_scope(layer_name, reuse=True):
                w = tf.get_variable('weights')
                b = tf.get_variable('biases')
                print('\n')
                print(layer_name)
                print('weights shape: ', w.shape)
                print('biases shape', b.shape)
# test_load_with_skip()

# 定义一些函数

In [20]:
def loss_compute(logits, labels):
    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels, name='cross-entropy')
        loss = tf.reduce_mean(cross_entropy, name='loss')
        return loss
    
    
def accuracy_compute(logits, labels):
    with tf.name_scope('accuracy'):
        correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
        correct = tf.cast(correct, tf.float32)
        accuracy = tf.reduce_mean(correct) * 100
        tf.summary.scalar('accuracy', accuracy)
    return accuracy


def num_correct_prediction(logits, labels):
    with tf.name_scope('accuracy'):
        correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
        correct = tf.cast(correct, tf.float32)
        n_correct = tf.reduce_mean(correct) * 100
    return n_correct

# 开始训练

In [21]:
def train():
    tf.reset_default_graph()
    pre_trained_file = ".//vgg16_pretrain//vgg16.npy"
    data_dir = ".//data//cifar-10-batches-bin"
    log_dir =  ".//logs"
    
    with tf.name_scope('input'):
        tra_image_batch, tra_label_batch = read_cifar10(data_dir=data_dir, is_train=True, batch_size=Batch_size)
        val_image_batch, val_label_batch = read_cifar10(data_dir=data_dir, is_train=False,batch_size=Batch_size)
        
    x = tf.placeholder(tf.float32, shape=[Batch_size, IMG_width, IMG_hight, Channels])
    tf.summary.image('image', x, 10)  # record the images which are loaded for CIFAR10
    y = tf.placeholder(tf.int32, shape=[Batch_size, Classes])
    logits = Net_VGG16(x, Classes)
    loss = loss_compute(logits, y)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=Learning_rate)
    train_op = optimizer.minimize(loss, name='optimize')
    accuracy = accuracy_compute(logits, y)
    
    summary_op = tf.summary.merge_all()
    saver = tf.train.Saver(tf.global_variables())
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    init = tf.global_variables_initializer()
    sess = tf.Session(config=config)
    sess.run(init)

    load_with_skip(pre_trained_file, sess, ['fc6', 'fc7', 'fc8'])
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    tra_summary_writer = tf.summary.FileWriter(log_dir+'//train', sess.graph)
    
    images, labels = sess.run([tra_image_batch, tra_label_batch])
    
    for step in range(Max_step):
        summary_str, tra_accuracy, _, tra_loss = sess.run([summary_op, accuracy, train_op, loss],
                                             feed_dict={x: images, y: labels})
        if step % 20 == 0:
            print(tra_loss)
            print(tra_accuracy)
            tra_summary_writer.add_summary(summary_str, step)
            checkpoint_path = os.path.join(log_dir+'//train', 'modle.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
    
    coord.request_stop()
    coord.join(threads)
    sess.close()

# train()

In [26]:
def evaluate():
    with tf.Graph().as_default():
        log_dir = './logs/train/'
        test_dir = './data/cifar-10-batches-bin'
        n_test = 5000
        images, labels = read_cifar10(data_dir=test_dir, is_train=False, batch_size=Batch_size)
        logits=Net_VGG16(images, Classes)
        correct = num_correct_prediction(logits, labels)
        
        saver = tf.train.Saver(tf.global_variables())
        
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        init = tf.global_variables_initializer()
        sess = tf.Session(config=config)
        sess.run(init)
        
        print('Reading checkpoints...')
        ckpt = tf.train.get_checkpoint_state(log_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('Loading success')
        else:
            print('No checkpoint file found')
            return
        
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        
        print('\nEvaluating...')
        num_step = int(math.floor(n_test/Batch_size))
        num_sample = num_step * Batch_size
        step = 0
        total_correct = 0
        while step < num_step and not coord.should_stop():
            batch_correct = sess.run(correct)
            total_correct += np.sum(batch_correct)
            step += 1
        print('Total testing samples: %d' % num_sample)
        print('Total correct predictions: %d' % total_correct)
        print('Average accuracy: %.2f%%' %(100*total_correct/num_sample))
        
        coord.request_stop()
        coord.join(threads)
evaluate()

Reading checkpoints...
INFO:tensorflow:Restoring parameters from ./logs/train/modle.ckpt-80
Loading success

Evaluating...
Total testing samples: 4992
Total correct predictions: 1092
Average accuracy: 21.88%
