## dogs feature data generator

In [10]:
%load_ext autoreload
%autoreload 2
import os
import shutil
import numpy as np

from keras import applications
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Input, Dropout, Flatten, Dense, Conv2D, BatchNormalization, Activation, AveragePooling2D, concatenate, GlobalAveragePooling2D, MaxPooling2D
from keras.models import model_from_json
from keras.callbacks import ModelCheckpoint
from keras.metrics import top_k_categorical_accuracy
from keras.utils import Progbar, GeneratorEnqueuer
from keras.applications import imagenet_utils
import keras.backend as K
import tensorflow as tf
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import inspect
import matplotlib.patches as patches
import xml.etree.ElementTree as ET
from utils import mkdirp, load_base, load_feature, anno_file_to_rect, anno_file_to_yolo_y, get_image_size, img2data, npy_file_x, npy_file_y
from yolo_data_generator import YoloDataGenerator

%matplotlib inline 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
images_root = "/Users/xuan/work/dataset/dogs/images"
annotations_root = "/Users/xuan/work/dataset/dogs/annotations"

train_images_root = images_root + "/train"
val_images_root = images_root + "/val"
train_annotations_root = annotations_root + "/train"
val_annotations_root = annotations_root + "/val"

In [3]:
# build the feature data
round = .1
nb_train_samples = NB_TRAIN_SAMPLES = int(16494 * round // 1) ## copy from split script
nb_val_samples = NB_VAL_SAMPLES = int(4086 * round //1) ## copy from split script
epochs = 50
batch_size = 16
n_classes = 120
img_width, img_height = 224, 224

base_model_name = "mn"
GRIDS = {"inception": (5,5), "mn": (7,7)}
grid = GRIDS[base_model_name]

mkdirp(base_model_name)

In [4]:
# generate the feature map files if not yet generate before
train_generator = YoloDataGenerator(train_images_root, train_annotations_root, grid = grid)
val_generator = YoloDataGenerator(val_images_root, val_annotations_root, grid = grid)

def generate_features(generator, processor, model, total_samples, output_dir, prefix, batch_size = 16):
    steps = 1
    enqueuer = GeneratorEnqueuer(generator)
    feature_file = npy_file_x(output_dir, prefix)
    label_file = npy_file_y(output_dir, prefix)
    all_features = []
    all_labels = []
    batch_nums = total_samples//batch_size
    progbar = Progbar(target=batch_nums)
    
    enqueuer.start()
    output_generator = enqueuer.get()
    
    for b in range(0, batch_nums):
      batch_x, batch_labels = next(output_generator) ## 从原始数据中拉取一批数据
      feature = model.predict(processor(batch_x), batch_size = batch_size) # 根据数据提取feature
      all_labels.append(batch_labels)
      all_features.append(feature)
      steps += 1
      progbar.update(steps)

    all_features = np.concatenate(all_features)
    all_labels = np.concatenate(all_labels)
    
    print "Round {} finished, saved to features {} (shapes {}) labels {} (shape {})".format(round, feature_file, all_features.shape, label_file, all_labels.shape)
    
    np.save(open(feature_file, 'w'), all_features)
    np.save(open(label_file, 'w'), all_labels)


In [12]:
# generate the features
base_model, base_decoder, processor = load_base(base_model_name)
for layer in base_model.layers[:-1]:
    layer.trainable = False
print "load the base model done", base_model_name
generate_features(val_generator.generate(), processor, base_model, nb_val_samples, base_model_name, "val-dogs-120")
generate_features(train_generator.generate(), processor, base_model, nb_train_samples, base_model_name, "train-dogs-120")

load the base model done mn
Round 0.1 finished, saved to features mn/val-dogs-120.x.npy (shapes (400, 7, 7, 1024)) labels mn/val-dogs-120.y.npy (shape (400, 245))
Round 0.1 finished, saved to features mn/train-dogs-120.x.npy (shapes (1648, 7, 7, 1024)) labels mn/train-dogs-120.y.npy (shape (1648, 245))
