In [None]:
batch_size = 256

自定义 log 函数

In [None]:
LOGINFO = 1

def log_info(*args):
    if LOGINFO:
        for i in args:
            print(i,end=' ')
        print()

导入数据

In [None]:
import os
import numpy as np
import cv2
import time

def input_data(npz=True):
    if os.path.exists('bird_data.npz') :
        bird_data = np.load('bird_data.npz')
        return bird_data['train_img'],bird_data['test_img'],bird_data['train_label'],bird_data['test_label']
    else:      
        data_path = os.path.join('..','data','CUB_200_2011')
        log_info(os.listdir(data_path))

        train_test_split_file = os.path.join(data_path,'train_test_split.txt')
        with open(train_test_split_file,'r') as file:
            train_test_split = np.array([i.split()[1] for i in file.readlines()]).astype('bool')
        log_info(train_test_split,train_test_split.size)

        img_paths_file = os.path.join(data_path,'images.txt')
        with open(img_paths_file,'r') as file:
            img_paths = [i.split()[1] for i in file.readlines()]
        log_info(img_paths[:1],len(img_paths))

        img_labels_file = os.path.join(data_path,'image_class_labels.txt')
        with open(img_labels_file,'r') as file:
            img_labels = np.array([i.split()[1] for i in file.readlines()]).astype('int')
        log_info(img_labels,len(img_labels))

        img_dir = os.path.join(data_path,'images')

        img_paths_train = [os.path.join(img_dir,os.path.sep.join(path.split('/'))) for i,path in enumerate(img_paths) if train_test_split[i]]
        log_info(img_paths_train[:1],len(img_paths_train))
        img_paths_test = [os.path.join(img_dir,os.path.sep.join(path.split('/'))) for i,path in enumerate(img_paths) if not train_test_split[i]]
        log_info(img_paths_test[:1],len(img_paths_test))

        train_img = np.array([cv2.resize(cv2.imread(i),(64,64)) for i in img_paths_train])
        test_img = np.array([cv2.resize(cv2.imread(i),(64,64)) for i in img_paths_test])
        train_label = np.array([l for i,l in enumerate(img_labels) if train_test_split[i] ])
        test_label = np.array([l for i,l in enumerate(img_labels) if not train_test_split[i]])
        log_info(train_label,train_label.size)
        log_info(test_label,test_label.size)

        np.savez('bird_data',train_img=train_img,test_img=test_img,train_label=train_label,test_label=test_label)
        return train_img,test_img,train_label,test_label

In [None]:
x_train,x_test,y_train,y_test = input_data()
log_info('type:',type(x_train),type(y_train))
log_info('shape:',x_train.shape,y_train.shape)
log_info('size:',x_train.size,y_train.size)

数据预处理，打乱并拆分 *训练集* 和 *验证集*（5000：994）

In [None]:
# import keras

num_classes = 200

# 数据预处理，把 0-255的灰度值转成 0-1 之间的浮点数
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255
y_train, y_test = np.array(y_train)-1, np.array(y_test)-1
# Convert class vectors to binary class matrices.
# y_train = keras.utils.to_categorical(y_train-1, num_classes)
# y_test = keras.utils.to_categorical(y_test-1, num_classes)

# shuffle
x_train, y_train = np.array(x_train),np.array(y_train)
index = [i for i in range(len(y_train))]
np.random.shuffle(index)
x_train = x_train[index]
y_train = y_train[index]

# # 拆分验证集
# (x_valid, x_train) = x_train[5000:], x_train[:5000] # 994+5000
# (y_valid, y_train) = y_train[5000:], y_train[:5000]

log_info('type:',type(x_train),type(y_train))
log_info('shape:',x_train.shape,y_train.shape)
log_info('size:',x_train.size,y_train.size)


向量外积

In [None]:

def outer_product(a, b):
    OP_DIM = a.shape[1]

    template1 = np.zeros([OP_DIM,OP_DIM*OP_DIM])
    for i in range(OP_DIM):
        for j in range(OP_DIM):
            template1[i,OP_DIM*i+j] = 1

    ''' 
    OP_DIM = 4
    [[1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0.]
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1.]] 

    '''
    template2 = np.zeros([OP_DIM,OP_DIM*OP_DIM])
    for i in range(OP_DIM):
        for j in range(OP_DIM):
            template2[i,OP_DIM*j+i] = 1

    '''
    OP_DIM = 4
    [[1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
     [0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.]
     [0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0.]
     [0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1.]] 
    '''
    tml1 = tf.convert_to_tensor(template1, dtype=float)
    tml2 = tf.convert_to_tensor(template2, dtype=float)
    return tf.matmul(a,tml1)*tf.matmul(b,tml2)

batch generator

In [None]:
def shuffle_aligned_list(data):
    """Shuffle arrays in a list by shuffling each array identically."""
    num = data[0].shape[0]
    p = np.random.permutation(num)
    return [d[p] for d in data]

def batch_generator(data, batch_size, shuffle=True):
    """Generate batches of data.

    Given a list of array-like objects, generate batches of a given
    size by yielding a list of array-like objects corresponding to the
    same slice of each input.
    """
    if shuffle:
        data = shuffle_aligned_list(data)

    batch_count = 0
    while True:
        if batch_count * batch_size + batch_size > len(data[0]):
            batch_count = 0

            if shuffle:
                data = shuffle_aligned_list(data)

        start = batch_count * batch_size
        end = start + batch_size
        batch_count += 1
        yield [d[start:end] for d in data]

data_gen = batch_generator([x_test,y_test], batch_size, shuffle=True)

In [None]:
import tensorflow as tf

def _variable_with_weight_loss(name, shape, stddev, wl):
    var = _variable_on_gpu(name, shape,
                         tf.truncated_normal_initializer(stddev=stddev))
    if wl is not None:
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wl, name='weight_loss')
        tf.add_to_collection('losses', weight_decay)
    return var

def _variable_on_gpu(name, shape, initializer):
    with tf.device('/gpu:0'):
        var = tf.get_variable(name, shape, initializer=initializer)
    return var


conv_feat_dim = 64


image_holder = tf.placeholder(tf.float32, [batch_size, 64, 64, 3])
label_holder = tf.placeholder(tf.int32, [batch_size])

def conv_net(name):
    with tf.variable_scope(name):

        with tf.variable_scope('conv1') as scope:
            weight1 = _variable_with_weight_loss('weights',shape=[5, 5, 3, 64], stddev=5e-2, wl=0.0)
            kernel1 = tf.nn.conv2d(image_holder, weight1, [1, 1, 1, 1], padding='SAME')
            bias1 = tf.Variable(tf.constant(0.0, shape=[64]),name='bias')
            conv1 = tf.nn.relu(tf.nn.bias_add(kernel1, bias1), name=scope.name)
        pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
        norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

        with tf.variable_scope('conv2') as scope:
            weight2 = _variable_with_weight_loss('weights',shape=[5, 5, 64, 64], stddev=5e-2, wl=0.0)
            kernel2 = tf.nn.conv2d(norm1, weight2, [1, 1, 1, 1], padding='SAME')
            bias2 = tf.Variable(tf.constant(0.1, shape=[64]),name='bias')
            conv2 = tf.nn.relu(tf.nn.bias_add(kernel2, bias2), name=scope.name)
        norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
        pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        # 全连接层
        with tf.variable_scope('local3') as scope:
            reshape = tf.reshape(pool2, [batch_size, -1])
            dim = reshape.get_shape()[1].value
            weight3 = _variable_with_weight_loss('weights',shape=[dim, 384], stddev=0.04, wl=0.004)
            bias3 = tf.Variable(tf.constant(0.1, shape=[384]),name='bias')
            local3 = tf.nn.relu(tf.matmul(reshape, weight3) + bias3, name=scope.name)

        # 全连接层
        with tf.variable_scope('local4') as scope:
            weight4 = _variable_with_weight_loss('weights',shape=[384, conv_feat_dim], stddev=0.04, wl=0.004)
            bias4 = tf.Variable(tf.constant(0.1, shape=[conv_feat_dim]),name='bias')
            local4 = tf.nn.relu(tf.matmul(local3, weight4) + bias4, name=scope.name)

    return local4

part1 = conv_net('conv_net1')
part2 = conv_net('conv_net2')
product = outer_product(part1, part2)


with tf.variable_scope('softmax_linear') as scope:
    weight5 = _variable_with_weight_loss('weights',shape=[product.shape[1], num_classes], stddev=1 / conv_feat_dim, wl=0.0)
    bias5 = tf.Variable(tf.constant(0.0, shape=[num_classes]),name='bias')
    logits = tf.add(tf.matmul(product, weight5), bias5, name=scope.name)


def loss(logits, labels):

    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=labels, name='cross_entropy_per_example')

    cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                        name='cross_entropy')

    tf.add_to_collection('losses', cross_entropy_mean)

    return tf.add_n(tf.get_collection('losses'), name='total_loss')


loss = loss(logits, label_holder)

global_step = tf.Variable(0, name='global_step', trainable=False)

starter_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           5000, 0.96, staircase=True)

train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

top_k_op = tf.nn.in_top_k(logits, label_holder, 1)


saver = tf.train.Saver(tf.global_variables(),max_to_keep=1)



In [None]:
from keras.preprocessing.image import ImageDataGenerator
 
datagen_train = ImageDataGenerator(
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    rotation_range=30,    #0-180
    horizontal_flip = True)


训练神经网络

In [None]:
max_steps = 1000000
model_dir = 'model'
restored_global_step = 0

# sess = tf.InteractiveSession()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    ckpt = tf.train.get_checkpoint_state(model_dir)  # 注意此处是checkpoint存在的目录
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess,ckpt.model_checkpoint_path) # 自动恢复model_checkpoint_path保存模型,一般是最新
        print("Model restored ...")
        restored_global_step = global_step.eval()
        print('restored_global_step: ',restored_global_step)
    else:
        print('Start from scratch ...')



    for step in range(max_steps):
        
        image_batch, label_batch = next(datagen_train.flow(x_train, y_train, batch_size=batch_size))
#         print(label_batch)
#         for i in image_batch:
#             cv2.imshow('pic',i)
#             cv2.waitKey(0)
        
        if step % 100 == 0 or (step + 1) == max_steps:
            
            preditcions, loss_value = sess.run([top_k_op, loss], feed_dict={image_holder: image_batch,label_holder: label_batch})
            print('train batch precision: {} loss: {}'.format(np.sum(preditcions)/batch_size, loss_value))
            
            test_image_batch, test_label_batch = next(data_gen)
            preditcions, loss_value = sess.run([top_k_op, loss], feed_dict={image_holder: test_image_batch,label_holder: test_label_batch})
            print('test  batch precision: {} loss: {}'.format(np.sum(preditcions)/batch_size, loss_value))

            
            checkpoint_path = os.path.join(model_dir, 'model.ckpt')
            g_step = restored_global_step+step
            sess.run(tf.assign(global_step,g_step))
            saver.save(sess, checkpoint_path, global_step=g_step)
            
            continue

        start_time = time.time()

        _, loss_value = sess.run([train_op, loss],
                                 feed_dict={image_holder: image_batch, label_holder: label_batch})

        duration = time.time() - start_time
        if step % 10 == 0:

            examples_per_sec = batch_size / duration
            sec_per_batch = float(duration)
            g_step = restored_global_step+step
            format_str = ('global_step: %d, loss: %.2f (%.1f examples/sec; %.3f sec/batch) lr：%g')
            print(format_str % (g_step, loss_value, examples_per_sec, sec_per_batch, sess.run(learning_rate)))
            

        
 

测试集上分类准确率

In [None]:
with tf.Session() as sess:
    ckpt = tf.train.get_checkpoint_state(model_dir)  # 注意此处是checkpoint存在的目录
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess,ckpt.model_checkpoint_path)
#     saver = tf.train.import_meta_graph('model/model.ckpt.meta')
#     saver.restore(sess, tf.train.latest_checkpoint(model_dir))
    # 样本数
    data_gen = batch_generator([x_test,y_test], batch_size, shuffle=False)
    num_examples = x_test.shape[0]
    num_iter = num_examples // batch_size
    total_sample_count = num_iter * batch_size

    step = 0
    true_count = 0
    while step < num_iter:

        image_batch, label_batch = next(data_gen)

        preditcions = sess.run([top_k_op], feed_dict={image_holder: image_batch,
                                                      label_holder: label_batch})
        true_count += np.sum(preditcions)
        step += 1

precision = true_count / total_sample_count
print('precision @ 1 = %.3f' % precision)