自定义 log 函数

In [1]:
LOGINFO = 1

def log_info(*args):
    if LOGINFO:
        for i in args:
            print(i,end=' ')
        print()

导入数据

In [2]:
import os
import numpy as np
import cv2

def input_data(npz=True):
    if npz:
        bird_data = np.load('bird_data.npz')
        return bird_data['train_img'],bird_data['test_img'],bird_data['train_label'],bird_data['test_label']
    else:      
        data_path = os.path.join('..','data','CUB_200_2011')
        log_info(os.listdir(data_path))

        train_test_split_file = os.path.join(data_path,'train_test_split.txt')
        with open(train_test_split_file,'r') as file:
            train_test_split = np.array([i.split()[1] for i in file.readlines()]).astype('bool')
        log_info(train_test_split,train_test_split.size)

        img_paths_file = os.path.join(data_path,'images.txt')
        with open(img_paths_file,'r') as file:
            img_paths = [i.split()[1] for i in file.readlines()]
        log_info(img_paths[:1],len(img_paths))

        img_labels_file = os.path.join(data_path,'image_class_labels.txt')
        with open(img_labels_file,'r') as file:
            img_labels = np.array([i.split()[1] for i in file.readlines()]).astype('int')
        log_info(img_labels,len(img_labels))

        img_dir = os.path.join(data_path,'images')

        img_paths_train = [os.path.join(img_dir,os.path.sep.join(path.split('/'))) for i,path in enumerate(img_paths) if train_test_split[i]]
        log_info(img_paths_train[:1],len(img_paths_train))
        img_paths_test = [os.path.join(img_dir,os.path.sep.join(path.split('/'))) for i,path in enumerate(img_paths) if not train_test_split[i]]
        log_info(img_paths_test[:1],len(img_paths_test))

        train_img = np.array([cv2.resize(cv2.imread(i),(64,64)) for i in img_paths_train])
        test_img = np.array([cv2.resize(cv2.imread(i),(64,64)) for i in img_paths_test])
        train_label = np.array([l for i,l in enumerate(img_labels) if train_test_split[i] ])
        test_label = np.array([l for i,l in enumerate(img_labels) if not train_test_split[i]])
        log_info(train_label,train_label.size)
        log_info(test_label,test_label.size)

        np.savez('bird_data',train_img=train_img,test_img=test_img,train_label=train_label,test_label=test_label)
        return train_img,test_img,train_label,test_label

In [3]:
x_train,x_test,y_train,y_test = input_data(1)
log_info('type:',type(x_train),type(y_train))
log_info('shape:',x_train.shape,y_train.shape)
log_info('size:',x_train.size,y_train.size)

type: <class 'numpy.ndarray'> <class 'numpy.ndarray'> 
shape: (5994, 64, 64, 3) (5994,) 
size: 73654272 5994 


数据预处理，打乱并拆分 *训练集* 和 *验证集*（5000：994）

In [4]:
import keras
from keras.datasets import cifar10
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint

num_classes = 200

# 数据预处理，把 0-255的灰度值转成 0-1 之间的浮点数
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train-1, num_classes)
y_test = keras.utils.to_categorical(y_test-1, num_classes)

# shuffle
x_train, y_train = np.array(x_train),np.array(y_train)
index = [i for i in range(len(y_train))]
np.random.shuffle(index)
x_train = x_train[index]
y_train = y_train[index]

# 拆分验证集
(x_valid, x_train) = x_train[5000:], x_train[:5000] # 994+5000
(y_valid, y_train) = y_train[5000:], y_train[:5000]

log_info('type:',type(x_train),type(y_train))
log_info('shape:',x_train.shape,y_train.shape)
log_info('size:',x_train.size,y_train.size)


Using TensorFlow backend.


type: <class 'numpy.ndarray'> <class 'numpy.ndarray'> 
shape: (5000, 64, 64, 3) (5000, 200) 
size: 61440000 1000000 


数据扩充

In [5]:
from keras.preprocessing.image import ImageDataGenerator
 
datagen_train = ImageDataGenerator(
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    rotation_range=30,    #0-180
    horizontal_flip = True)

datagen_train.fit(x_train)

**CNN**: 3个卷积层+2个全连接层

In [6]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', strides=2, input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(512, activation='relu', name='feat_vec'))

model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.summary()

# initiate optimizer
sgd = keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)

# train the model using RMSprop
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 16, 16, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 8, 8, 64)          0         
__________

训练神经网络

In [7]:
weights_path = 'bird.best.hdf5'
# model.load_weights(weights_path)
# batch_size=64
# checkpoint = ModelCheckpoint(filepath=weights_path, verbose=2, save_best_only=True)
# history = model.fit_generator(datagen_train.flow(x_train, y_train, batch_size=batch_size),
#                 steps_per_epoch=x_train.shape[0] // batch_size,
#                 epochs = 500,
#                 shuffle=True,
#                 verbose=1,
#                 callbacks=[checkpoint],
#                 validation_data=(x_valid, y_valid),
#                 validation_steps=x_valid.shape[0] // batch_size)


测试集上分类准确率

In [8]:
model.load_weights(weights_path)
# evaluate
loss, accuracy = model.evaluate(x_test, y_test)
print('evaluate: loss:{} acc:{}'.format(loss, accuracy))

evaluate: loss:4.785721931862428 acc:0.15516051087331723


计算数据集特征

In [9]:
x_train,x_test,y_train,y_test = input_data(1)
# 数据预处理，把 0-255的灰度值转成 0-1 之间的浮点数
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255
y_train, y_test = y_train-1, y_test-1 

feature_model = Model(inputs=model.input, outputs=model.get_layer('feat_vec').output)
feature_vecs_train = feature_model.predict(x_train)
feature_vecs_test = feature_model.predict(x_test)
np.savez('feature_vecs',train=feature_vecs_train,test=feature_vecs_test)
# feature_vecs_test_01 = (feature_vecs_test>0).astype(int)
# feature_vecs_train_01 = (feature_vecs_train>0).astype(int)

In [10]:
num_per_class = [np.sum(np.array(y_train) == i) for i in range(num_classes)] # 每一类鸟的训练样本数量 29-30

def normalize(a):
    return np.diag(1/np.sqrt(np.sum(np.square(a),axis=1))).dot(a) #归一化
feature_vecs_train, feature_vecs_test = normalize(feature_vecs_train),normalize(feature_vecs_test)

# 将训练集作为 检索库
x_database,y_database = x_train, y_train
feature_vecs_database = feature_vecs_train


# 从 v_set 中找出和 v 最相似的 k 个元素
def topK(v, v_set, k):
    dist = np.array([distance(v,i) for i in v_set])
    idx = np.argpartition(dist, k)[:k]
#   return dist.argsort()[:k][::-1]
    return idx[np.argsort(dist[idx])]

def distance(a,b):
    return -a.dot(b) # 余弦距离
    return np.sum(np.square(a-b)) # 欧式距离

def onehot2class(onehot):
    return np.argmax(onehot) #0-199

def retrieve(x,k):
    featone = feature_model.predict(np.expand_dims(x, axis=0))
    return topK(featone,feature_vecs_database,k)

def AP(idxK,y):
    bool_list = np.array([yi for yi in y_database[idxK]]) == y
    M = num_per_class[y]
    return np.sum(np.add.accumulate(bool_list)*bool_list.astype(int)/
                  (np.array([i+1 for i in range(len(bool_list))])))/M

def mAP(x_batch,y_batch,k):
    feature_vecs = feature_model.predict(x_batch)
    idxKs = [topK(featone,feature_vecs_database,k) for featone in feature_vecs]
    return np.average([ AP(idxK,y) for idxK,y in zip(idxKs,y_batch)])

import time

for k in [1,5,10,50]:
    beg = time.time()
    print("mAP @ {}: {}".format(k,mAP(x_test,y_test,k)))
    end = time.time()
    print('time used: {}s'.format(end-beg))

# print(feature_model.predict(np.expand_dims(x_test[32], axis=0))) # 稀疏的特征

mAP @ 1: 0.00356333742000246
time used: 37.88679480552673s
mAP @ 5: 0.009109373946095643
time used: 35.294941902160645s
mAP @ 10: 0.012631095861518534
time used: 35.273383140563965s
mAP @ 50: 0.022817139255647614
time used: 34.55383896827698s


In [None]:
for i in x_train:
    cv2.imshow('img',i)
    cv2.waitKey(0)
cv2.destroyAllWindows()