In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
def get_files(file_dir):
    """
    Args:
        file_dir: file directory
    Returns:
        list of images and labels
    """
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []
    for file in os.listdir(file_dir):
        name = file.split(sep='.')
        if name[0] == 'cat':
            cats.append(file_dir + file)
            label_cats.append(0)
        else:
            dogs.append(file_dir + file)
            label_dogs.append(1)
    print('There are %d cats\nThere are %d dogs' % (len(cats), len(dogs)))

    image_list = np.hstack((cats, dogs))
    label_list = np.hstack((label_cats, label_dogs))

    temp = np.array([image_list, label_list])
    temp = temp.transpose()
    np.random.shuffle(temp)

    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(i) for i in label_list]

    return image_list, label_list


# %%

def get_batch(image, label, image_W, image_H, batch_size, capacity):
    """
    Args:
        image: list type
        label: list type
        image_W: image width
        image_H: image height
        batch_size: batch size
        capacity: the maximum elements in queue
    Returns:
        image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32
        label_batch: 1D tensor [batch_size], dtype=tf.int32
    """

    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)

    # make an input queue
    input_queue = tf.train.slice_input_producer([image, label])

    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents, channels=3)

    ######################################
    # data argumentation should go to here
    ######################################

    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

    # if you want to test the generated batches of images, you might want to comment the following line.
    # 如果想看到正常的图片，请注释掉111行（标准化）和 126行（image_batch = tf.cast(image_batch, tf.float32)）
    # 训练时不要注释掉！
    image = tf.image.per_image_standardization(image)

    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size=batch_size,
                                              num_threads=64,
                                              capacity=capacity)

    # you can also use shuffle_batch
    #    image_batch, label_batch = tf.train.shuffle_batch([image,label],
    #                                                      batch_size=BATCH_SIZE,
    #                                                      num_threads=64,
    #                                                      capacity=CAPACITY,
    #                                                      min_after_dequeue=CAPACITY-1)

    label_batch = tf.reshape(label_batch, [batch_size])
    image_batch = tf.cast(image_batch, tf.float32)

    return image_batch, label_batch

In [3]:
def VGG16N(x, n_classes, is_pretrain=True):
    with tf.name_scope('VGG16'):
        x = conv('conv1_1', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv1_2', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool1'):
            x = pool('pool1', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = conv('conv2_1', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv2_2', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool2'):
            x = pool('pool2', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = conv('conv3_1', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv3_2', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv3_3', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool3'):
            x = pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = conv('conv4_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv4_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv4_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool4'):
            x = pool('pool4', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = conv('conv5_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv5_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = conv('conv5_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool5'):
            x = pool('pool5', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = FC_layer('fc6', x, out_nodes=4096)
        with tf.name_scope('batch_norm1'):
            x = batch_norm(x)
        x = FC_layer('fc7', x, out_nodes=4096)
        with tf.name_scope('batch_norm2'):
            x = batch_norm(x)
        x = FC_layer('fc8', x, out_nodes=n_classes)

        return x

In [4]:
def conv(layer_name, x, out_channels, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=True):
    in_channels = x.get_shape()[-1]
    with tf.variable_scope(layer_name):
        w = tf.get_variable(name='weights',
                            trainable=is_pretrain,
                            shape=[kernel_size[0], kernel_size[1], in_channels, out_channels],
                            initializer=tf.contrib.layers.xavier_initializer())  # default is uniform distribution initialization
        b = tf.get_variable(name='biases',
                            trainable=is_pretrain,
                            shape=[out_channels],
                            initializer=tf.constant_initializer(0.0))
        x = tf.nn.conv2d(x, w, stride, padding='SAME', name='conv')
        x = tf.nn.bias_add(x, b, name='bias_add')
        x = tf.nn.relu(x, name='relu')
        return x
    
def pool(layer_name, x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True, padding='SAME'):
    if is_max_pool:
        x = tf.nn.max_pool(x, kernel, strides=stride, padding=padding, name=layer_name)
    else:
        x = tf.nn.avg_pool(x, kernel, strides=stride, padding=padding, name=layer_name)
    return x

def loss(logits, labels):
    """Compute loss
    Args:
        logits: logits tensor, [batch_size, n_classes]
        labels: one-hot labels
    """
    with tf.name_scope('loss') as scope:
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='cross-entropy')
        loss = tf.reduce_mean(cross_entropy, name='loss')
        tf.summary.scalar(scope + '/loss', loss)
        return loss
    
def accuracy(logits, labels):
    """Evaluate the quality of the logits at predicting the label.
    Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, 
    """
    with tf.name_scope('accuracy') as scope:
        correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
        correct = tf.cast(correct, tf.float32)
        accuracy = tf.reduce_mean(correct) * 100.0
        tf.summary.scalar(scope + '/accuracy', accuracy)
    return accuracy

def FC_layer(layer_name, x, out_nodes):
    """Wrapper for fully connected layers with RELU activation as default
    Args:
        layer_name: e.g. 'FC1', 'FC2'
        x: input feature map
        out_nodes: number of neurons for current FC layer
    """
    shape = x.get_shape()
    if len(shape) == 4:
        size = shape[1].value * shape[2].value * shape[3].value
    else:
        size = shape[-1].value

    with tf.variable_scope(layer_name):
        w = tf.get_variable('weights',
                            shape=[size, out_nodes],
                            initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable('biases',
                            shape=[out_nodes],
                            initializer=tf.constant_initializer(0.0))
        flat_x = tf.reshape(x, [-1, size])  # flatten into 1D

        x = tf.nn.bias_add(tf.matmul(flat_x, w), b)
        x = tf.nn.relu(x)
        return x
    
def batch_norm(x):
    """Batch normlization(I didn't include the offset and scale)
    """
    epsilon = 1e-3
    batch_mean, batch_var = tf.nn.moments(x, [0])
    x = tf.nn.batch_normalization(x,
                                  mean=batch_mean,
                                  variance=batch_var,
                                  offset=None,
                                  scale=None,
                                  variance_epsilon=epsilon)
    return x

def optimize(loss, learning_rate, global_step):
    """optimization, use Gradient Descent as default
    """
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op

In [5]:
# 定义参数
BATCH_SIZE = 64
CAPACITY = 256
IMG_W = 208
IMG_H = 208
N_CLASSES = 2
learning_rate = 0.01
MAX_STEP = 150
IS_PRETRAIN = True
train_dir = "./data/train"
test_dir = "./data/test"
train_log_dir = './logs/train/'
val_log_dir = './logs/val/'
pre_trained_weights = './VGG16/vgg16.npy'
graphCNN = tf.Graph()

In [6]:
def load_with_skip(data_path, session, skip_layer):
    """
    控制哪几层的参数不加载
    :param data_path:
    :param session:
    :param skip_layer:
    :return:
    """
    data_dict = np.load(data_path, encoding='latin1').item()
    for key in data_dict:
        if key not in skip_layer:
            with tf.variable_scope(key, reuse=True):
                for subkey, data in zip(('weights', 'biases'), data_dict[key]):
                    session.run(tf.get_variable(subkey).assign(data))

In [None]:
with graphCNN.as_default():
    # 读取文件
    image_list, label_list = get_files(train_dir)
    # 转换数据文件
    image_batch, label_batch = get_batch(image_list, label_list, IMG_W, IMG_H, len(image_list), CAPACITY)
    label_batch_reshape = tf.reshape(label_batch,[-1,1])
    
    # 占位符定义输入数据形状和标签形状
    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])

    logits = VGG16N(x, N_CLASSES, IS_PRETRAIN)
    loss = loss(logits, y)
    accuracy = accuracy(logits, y)

    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = optimize(loss, learning_rate, my_global_step)
    # tf.global_variables() 获取程序中的变量
    saver = tf.train.Saver(tf.global_variables())

There are 12500 cats
There are 12500 dogs


In [None]:
with tf.Session(graph=graphCNN) as sess:
    sess.run(tf.global_variables_initializer())
    summary_op = tf.summary.merge_all()
    image_list_len = len(image_list)
    # 读取训练好的VGG16
#     load_with_skip(pre_trained_weights, sess, ['fc8'])

    # 多线程
#     coord = tf.train.Coordinator()
#     threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # 记录图形和文件位置
#     tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
#     val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph)
    print(image_batch.shape)
    print(label_batch.shape)
    

    for step in np.arange(MAX_STEP):
        # 数据分批
        offset = (step * BATCH_SIZE) % (image_list_len - BATCH_SIZE)
        batch_data_train = image_batch[offset:(offset + BATCH_SIZE), :, :, :]
        batch_labels_train = label_batch_reshape[offset:(offset + BATCH_SIZE), :]
        sess.run([batch_data_train, batch_labels_train])
#         summary_t, _, tra_loss, tra_acc = sess.run([summary_op, train_op, loss, accuracy],
#                                                    feed_dict={x: batch_data_train, y: batch_labels_train})
#         # 输出loss值
#         if step % 50 == 0:
#             # 每一百步保存

#             # summary_v, _, val_loss, val_acc = sess.run([summary_op, train_op, loss, accuracy],
#             #                                            feed_dict={x: valid_dataset, y: valid_labels})
#             print('Step: %d, loss: %.4f, tra_accuracy: %.4f%%' % (i, tra_loss, tra_acc))
#             # print('Step: %d, loss: %.4f, val_accuracy: %.4f%%' % (i, val_loss, val_acc))
# #             tra_summary_writer.add_summary(summary_t, i)
#             # val_summary_writer.add_summary(summary_v, i)

#     print("Complete")


(25000, 208, 208, 3)
(25000,)
