# Localizer
用于实现object detection,即将一个大图片中的小物体截取出来。这里是regressor的思想。即这里解决一个拟合问题，输入是训练数据图片（Nx3x100x100）,输出是每个图片中对应的物体的坐标（Nx4）x,y,w,h(左上角坐标x,y及物体的宽和高)。

In [1]:
import os
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
K.set_image_dim_ordering('th')
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from keras.optimizers import SGD

Using TensorFlow backend.


In [23]:
from keras.layers import AveragePooling2D
from keras.layers.normalization import BatchNormalization

In [32]:
from keras.callbacks import ModelCheckpoint

In [9]:
np.random.seed(2016)

In [7]:
weights_path = '../deep-learning-models/vgg16_weights.h5'
top_model_weights_path = 'localization/localizer_bottleneck_fc_model_bn_v2.h5'
# dimensions of our images.0
img_width, img_height = 100, 100
nb_epoch = 50

In [3]:
def load_from_file(filename):
	import numpy as np
	return np.load( filename + '.npy')

In [8]:
def get_train_val():
    coordinates = load_from_file('localization/coordinates_float')
    resize_img = load_from_file('localization/resize_img_float')
    Y = coordinates.reshape((-1,4))
    X = resize_img.reshape((-1,3,100,100))
    # 这里的数据，x是0-255之间的值，y是0-100之间的值。都是浮点数类型。
    Y = Y/100.0
    X = X/255.0
    return X, Y

In [10]:
def shuffle(data, labels):
    rnd = np.random.get_state()
    np.random.shuffle(data)
    np.random.set_state(rnd)
    np.random.shuffle(labels)
    return data, labels

In [11]:
def get_shuffle_data():
    X, Y = get_train_val()
    X, Y = shuffle(X, Y)
    return X, Y

In [12]:
X, Y = get_shuffle_data()

In [14]:
Y[0]

array([ 0.1546875 ,  0.444     ,  0.17734375,  0.172     ])

# Step1
Fine Tune第一步是使用已经训练好的vgg16模型提取features.即将vgg16最后一个convBlock的输出作为features.

In [15]:
def save_bottleneck_features():
    # build the VGG16 network
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2', dim_ordering='th'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2', dim_ordering='th'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3', dim_ordering='th'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3', dim_ordering='th'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2', dim_ordering='th'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3', dim_ordering='th'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    # load the weights of the VGG16 networks
    # (trained on ImageNet, won the ILSVRC competition in 2014)
    # note: when there is a complete match between your model definition
    # and your weight savefile, you can simply call model.load_weights(filename)
    assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
        if k >= len(model.layers):
            # we don't look at the last (fully-connected) layers in the savefile
            break
        g = f['layer_{}'.format(k)]
        weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
        model.layers[k].set_weights(weights)
    f.close()
    print('Model loaded.')
    
    # train:
    bottleneck_features_train = model.predict(X[330:], batch_size=32, verbose=0)
    np.save(open('localization/local_bottleneck_features_train_V2.npy', 'w'), bottleneck_features_train)
    # validation:
    bottleneck_features_val = model.predict(X[:330], batch_size=32, verbose=0)
    np.save(open('localization/local_bottleneck_features_val_V2.npy', 'w'), bottleneck_features_val)
    


In [16]:
save_bottleneck_features()

Model loaded.


这里是用自定义的l2 损失函数，keras支持自定义损失函数和评价函数，遵照着keras这部分的源码就可以轻松写出。

In [18]:
from keras import backend as K
def l2_loss(y_true, y_pred):
    return K.sqrt(K.sum(K.square(y_pred - y_true), axis = -1))    

# Step 2
Fine Tune第二步是使用第一步提取的特征训练一个自定义的适用于自己要解决的问题的head.
这里是训练regression问题，所以是regression head。 将训练好的weights保存作为正式进行训练时的初始值。

In [19]:
def train_top_model():
    from keras.optimizers import SGD
    
    train_data = np.load(open('localization/local_bottleneck_features_train.npy'))
    train_labels = Y[330:]

    validation_data = np.load(open('localization/local_bottleneck_features_val.npy'))
    validation_labels = Y[:330]

    model = Sequential()
    
    model.add(AveragePooling2D(input_shape=train_data.shape[1:]))
    model.add(BatchNormalization(axis=1))
    model.add(Flatten())
    model.add(Dense(256, activation='relu', init='he_uniform'))
    model.add(BatchNormalization())
    model.add(Dense(512, activation='relu', init='he_uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(4, init='he_uniform'))

    callbacks = [
            EarlyStopping(monitor='val_loss', patience=3, verbose=0)
        ]
    
    sgd = SGD(lr=1e-2, decay=1e-4, momentum=0.9, nesterov=False)

    model.compile(loss=l2_loss, optimizer=sgd)
    
    model.fit(train_data, train_labels,
              nb_epoch=nb_epoch, batch_size=32, shuffle=True, verbose=2,
              validation_data=(validation_data, validation_labels), callbacks = callbacks)
    model.save_weights(top_model_weights_path)

In [24]:
train_top_model()

Train on 2969 samples, validate on 330 samples
Epoch 1/50
1s - loss: 1.9664 - val_loss: 0.5171
Epoch 2/50
1s - loss: 1.4636 - val_loss: 0.6276
Epoch 3/50
1s - loss: 1.2863 - val_loss: 0.6321
Epoch 4/50
1s - loss: 1.2551 - val_loss: 0.5344
Epoch 5/50
1s - loss: 1.1985 - val_loss: 0.5903


# Step 3
Fine Tune第三步是利用第二步训练好的头部weights和vgg16模型本身的weights在自己问题上的数据集进行训练。 这里数据规模中等的情况下freeze掉前四层convblock，对最后一个convblock和头部进行权值更新。

In [25]:
# build the VGG16 network
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

In [26]:
assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
    if k >= len(model.layers):
        # we don't look at the last (fully-connected) layers in the savefile
        break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    model.layers[k].set_weights(weights)
f.close()
print('Model loaded.')


Model loaded.


In [28]:
top_model = Sequential()
top_model.add(AveragePooling2D(input_shape=model.output_shape[1:]))
top_model.add(BatchNormalization(axis=1))
top_model.add(Flatten())
top_model.add(Dense(256, activation='relu', init='he_uniform'))
top_model.add(BatchNormalization())
top_model.add(Dense(512, activation='relu', init='he_uniform'))
top_model.add(BatchNormalization())
top_model.add(Dropout(0.5))
top_model.add(Dense(4, init='he_uniform'))

# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)

In [29]:
model.add(top_model)

In [30]:
for layer in model.layers[:25]:
    layer.trainable = False

# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
sgd = SGD(lr=1e-4, decay=1e-6, momentum=0.9, nesterov=False)

model.compile(loss=l2_loss, optimizer=sgd)

In [34]:
train_data = X[330:]
train_labels = Y[330:]
validation_data = X[:330]
validation_labels = Y[:330]

In [35]:
ckpt = ModelCheckpoint('model_weights/bb_regressor/100x100/loss-{val_loss:.2f}_vgg16_bn.h5', monitor='val_loss',
                       verbose=0, save_best_only=True, save_weights_only=True)
callbacks = [
            EarlyStopping(monitor='val_loss', patience=5), ckpt
        ]
model.fit(train_data, train_labels,
              nb_epoch=nb_epoch, batch_size=32, shuffle=True,
              validation_data=(validation_data, validation_labels), callbacks=callbacks)


Train on 2969 samples, validate on 330 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x11d511d50>

In [36]:
import cv2
img = cv2.imread('../input/test_stg1/img_00857.jpg')
o_height, o_width = img.shape[:2]
resized =  cv2.resize(img, (100, 100), cv2.INTER_LINEAR)
reshaped = resized.reshape((1,3,100,100))
print model.predict(reshaped)

[[ 15.80322075 -42.89010239  -2.88385892  -0.86162078]]


In [38]:
reshaped=reshaped/255.0

In [39]:
print model.predict(reshaped)

[[ 0.3734383   0.32192737  0.17624888  0.27438438]]


# localize
使用训练好的localizer对测试数据进行截取

In [44]:
def predict_test(model):
    target = 100
    import os
    import glob
    import cv2
    path = os.path.join('..','input','test_stg1', '*.jpg')
    files = glob.glob(path)
    for fl in files:
        flbase = os.path.basename(fl)
        # 得到原图：
        print flbase
        img = cv2.imread(fl)
        o_height, o_width = img.shape[:2]
        resized =  cv2.resize(img, (target, target), cv2.INTER_LINEAR)
        reshaped = resized.reshape((1,3,100,100))
        reshaped = reshaped/255.0
        (x0, y0, w0, h0) = model.predict(reshaped)[0]
        ratio_x = float(o_width)/float(target)
        retio_y = float(o_height)/float(target)
        x0 = x0*100
        y0 = y0*100
        w0 = w0*100
        h0 = h0*100
        x = int(x0*ratio_x)
        y = int(y0*retio_y)
        w = int(w0*ratio_x)
        h = int(h0*retio_y)
        x= np.max((x, 0))
        y= np.max((y, 0))
        height = np.min(((y+h), img.shape[0])) - y
        width = np.min(((x+w), img.shape[1])) - x
        sub = np.zeros([height, width, 3], dtype='uint8') 
        sub[:, :] = img[y:y + h, x:x+w, :] 
        cv2.imwrite('/Users/liumengyao/Documents/DataScience/fisher/input/test_stg1/'+'local_v2_2/'+flbase, sub)


In [41]:
predict_test(model)

img_00005.jpg
img_00007.jpg
img_00009.jpg
img_00018.jpg
img_00027.jpg
img_00030.jpg
img_00040.jpg
img_00046.jpg
img_00053.jpg
img_00071.jpg
img_00075.jpg
img_00102.jpg
img_00103.jpg
img_00109.jpg
img_00119.jpg
img_00120.jpg
img_00125.jpg
img_00128.jpg
img_00129.jpg
img_00133.jpg
img_00138.jpg
img_00141.jpg
img_00152.jpg
img_00161.jpg
img_00164.jpg
img_00170.jpg
img_00171.jpg
img_00172.jpg
img_00175.jpg
img_00180.jpg
img_00196.jpg
img_00212.jpg
img_00223.jpg
img_00230.jpg
img_00232.jpg
img_00244.jpg
img_00272.jpg
img_00280.jpg
img_00282.jpg
img_00285.jpg
img_00289.jpg
img_00292.jpg
img_00302.jpg
img_00311.jpg
img_00320.jpg
img_00331.jpg
img_00358.jpg
img_00374.jpg
img_00375.jpg
img_00376.jpg
img_00380.jpg
img_00383.jpg
img_00398.jpg
img_00407.jpg
img_00417.jpg
img_00432.jpg
img_00442.jpg
img_00446.jpg
img_00469.jpg
img_00473.jpg
img_00483.jpg
img_00484.jpg
img_00486.jpg
img_00487.jpg
img_00493.jpg
img_00518.jpg
img_00533.jpg
img_00538.jpg
img_00543.jpg
img_00550.jpg
img_00551.jpg
img_00

In [42]:
np.save(open('localization/localv2_X.npy', 'w'), X)
np.save(open('localization/localv2_Y.npy', 'w'), Y)

In [43]:
ckpt = ModelCheckpoint('model_weights/bb_regressor/100x100/loss-{val_loss:.2f}_vgg16_bn.h5', monitor='val_loss',
                       verbose=0, save_best_only=True, save_weights_only=True)
callbacks = [
            EarlyStopping(monitor='val_loss', patience=5), ckpt
        ]
model.fit(train_data, train_labels,
              nb_epoch=nb_epoch, batch_size=32, shuffle=True,
              validation_data=(validation_data, validation_labels), callbacks=callbacks)


Train on 2969 samples, validate on 330 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


<keras.callbacks.History at 0x113a24610>

In [45]:
predict_test(model)

img_00005.jpg
img_00007.jpg
img_00009.jpg
img_00018.jpg
img_00027.jpg
img_00030.jpg
img_00040.jpg
img_00046.jpg
img_00053.jpg
img_00071.jpg
img_00075.jpg
img_00102.jpg
img_00103.jpg
img_00109.jpg
img_00119.jpg
img_00120.jpg
img_00125.jpg
img_00128.jpg
img_00129.jpg
img_00133.jpg
img_00138.jpg
img_00141.jpg
img_00152.jpg
img_00161.jpg
img_00164.jpg
img_00170.jpg
img_00171.jpg
img_00172.jpg
img_00175.jpg
img_00180.jpg
img_00196.jpg
img_00212.jpg
img_00223.jpg
img_00230.jpg
img_00232.jpg
img_00244.jpg
img_00272.jpg
img_00280.jpg
img_00282.jpg
img_00285.jpg
img_00289.jpg
img_00292.jpg
img_00302.jpg
img_00311.jpg
img_00320.jpg
img_00331.jpg
img_00358.jpg
img_00374.jpg
img_00375.jpg
img_00376.jpg
img_00380.jpg
img_00383.jpg
img_00398.jpg
img_00407.jpg
img_00417.jpg
img_00432.jpg
img_00442.jpg
img_00446.jpg
img_00469.jpg
img_00473.jpg
img_00483.jpg
img_00484.jpg
img_00486.jpg
img_00487.jpg
img_00493.jpg
img_00518.jpg
img_00533.jpg
img_00538.jpg
img_00543.jpg
img_00550.jpg
img_00551.jpg
img_00