## dogs feature data generator

In [5]:
import os
import shutil
import numpy as np

from keras import applications
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Input, Dropout, Flatten, Dense, Conv2D, BatchNormalization, Activation, AveragePooling2D, concatenate, GlobalAveragePooling2D, MaxPooling2D
from keras.models import model_from_json
from keras.callbacks import ModelCheckpoint
from keras.metrics import top_k_categorical_accuracy
from keras.utils import Progbar, GeneratorEnqueuer
from keras.applications import imagenet_utils
import keras.backend as K
import tensorflow as tf
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import inspect
import matplotlib.patches as patches
import xml.etree.ElementTree as ET
%matplotlib inline 

In [9]:
images_root = "/Users/xuan/work/dataset/dogs/images"
annotations_root = "/Users/xuan/work/dataset/dogs/annotations"

train_images_root = images_root + "/train"
val_images_root = images_root + "/val"
train_annotations_root = annotations_root + "/train"
val_annotations_root = annotations_root + "/val"

In [6]:
# libs
def mkdirp(dir):
  try:
    os.mkdir(dir)
  except:
    pass

def load_base(model):
  if model == "vgg" or model == "vgg16":
      return applications.VGG16(weights='imagenet', include_top=False),  applications.vgg16.decode_predictions, applications.vgg16.preprocess_input
  elif model == "mobilenet" or model == "mn":
      return applications.MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3)), applications.mobilenet.decode_predictions, applications.mobilenet.preprocess_input
  elif model == "resnet" or model == "resnet50":
      return applications.ResNet50(weights='imagenet', include_top=False), applications.resnet50.decode_predictions, applications.resnet50.preprocess_input
  elif model == "inceptionv3" or model == "inception":
      return applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3)), applications.inception_v3.decode_predictions, applications.inception_v3.preprocess_input
  else:
      return None
    
def load_model(model):
  if model == "vgg" or model == "vgg16":
      return applications.VGG16(weights='imagenet'),  applications.vgg16.decode_predictions, applications.vgg16.preprocess_input
  elif model == "mobilenet" or model == "mn":
      return applications.MobileNet(weights='imagenet', input_shape=(224, 224, 3)), applications.mobilenet.decode_predictions, applications.mobilenet.preprocess_input
  elif model == "resnet" or model == "resnet50":
      return applications.ResNet50(weights='imagenet'), applications.resnet50.decode_predictions, applications.resnet50.preprocess_input
  elif model == "inceptionv3" or model == "inception":
      return applications.InceptionV3(weights='imagenet', input_shape=(224, 224, 3)), applications.inception_v3.decode_predictions, applications.inception_v3.preprocess_input
  else:
      return None

def npy_file(basedir, prefix):
  return "{}/{}.npy".format(basedir, prefix)

def npy_file_x(basedir, prefix):
  return npy_file(basedir, prefix + ".x")

def npy_file_y(basedir, prefix):
  return npy_file(basedir, prefix + ".y")


def load_feature(dir, prefix):
  feature_file = npy_file_x(dir, prefix)
  label_file = npy_file_y(dir, prefix)
  features = np.load(open(feature_file))
  labels = np.load(open(label_file))
  return features, labels

def conv2d_bn(x,
              filters,
              num_row,
              num_col,
              padding='same',
              strides=(1, 1),
              name=None):
    filters = int(filters)
    x = Conv2D(
        filters, (num_row, num_col),
        strides=strides,
        padding=padding,
        use_bias=False,
        name=name + "_conv")(x)
    x = BatchNormalization(scale=False, name=name + "_bn")(x)
    x = Activation('relu', name=name)(x)
    return x 

def incept(x, name, scale=1):
    branch1x1 = conv2d_bn(x, 64 // scale, 1, 1, name = name + "-1x1")

    branch5x5 = conv2d_bn(x, 48 // scale , 1, 1, name = name + "-5x5-1x1")
    branch5x5 = conv2d_bn(branch5x5, 64 // scale, 5, 5, name = name + "-5x5-5x5")

    branch3x3dbl = conv2d_bn(x, 64 // scale, 1, 1, name = name + "-3x3-1x1")
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96 // scale, 3, 3, name = name + "-3x3-3x3-1")
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96 // scale, 3, 3, name = name + "-3x3-3x3-2")

    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 32 // scale, 1, 1, name = name + "-pool")
    return concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        name= name + '-all')

In [15]:
# yolo functions
def anno_file_to_rect(anno_file):
    tree = ET.parse(anno_file)
    objs = tree.getroot().findall("object")
    boxes = [obj.find("bndbox") for obj in objs]
    return [(int(box.find("xmin").text), 
              int(box.find("ymin").text), 
              int(box.find("xmax").text), 
              int(box.find("ymax").text)) for box in boxes][0]

def get_image_size(af):
    tree = ET.parse(af)
    objs = tree.getroot().findall("object")
    boxes = [obj.find("bndbox") for obj in objs]
    image_size_el = tree.getroot().find("size")
    return (float(image_size_el.find("width").text), float(image_size_el.find("height").text))

## return 7 x 7 * 5(C, cx, cy, hx, hy)
def anno_file_to_yolo_y(af, grid = (7 , 7)):
    tree = ET.parse(af)
    objs = tree.getroot().findall("object")
    boxes = [obj.find("bndbox") for obj in objs]
    image_size_el = tree.getroot().find("size")
    image_width = float(image_size_el.find("width").text)
    image_height = float(image_size_el.find("height").text)
    cell_width = image_width / grid[0]
    cell_height = image_height / grid[1]
    
    rect = [(float(box.find("xmin").text), 
          float(box.find("ymin").text), 
          float(box.find("xmax").text), 
          float(box.find("ymax").text)) for box in boxes][0]

    width = rect[2] - rect[0]
    height = rect[3] - rect[1]
    center_x = (rect[2] + rect[0]) / 2.0
    center_y = (rect[3] + rect[1]) / 2.0
    
    # find which cell is the 1 one
    cell_idx_x = int(center_x * grid[0] / image_width)
    cell_idx_y = int(center_y * grid[1] / image_height)
    
    y1 = [1, 
          center_x / cell_width - cell_idx_x, 
          center_y / cell_height - cell_idx_y,
          width / image_width,
          height / image_height
         ]
    
    y = np.zeros((5 * grid[0] * grid[1]), dtype=np.float16)
    base = cell_idx_x  + cell_idx_y * grid[0]
    y[base * 5: (base + 1) * 5] = y1
    
    return y
  
def img2data(image_file):
  return np.array([img_to_array(load_img(image_file, target_size = (224, 224)))], dtype=np.float16)

JPEG_EXT = "JPEG"
flatten = lambda l: [item for sublist in l for item in sublist]

class YoloDataGenerator(object):
    'Generates image yolo from dataset'
    def __init__(self, image_dir, annotation_dir, grid = (7,7), batch_size = 16, target_size = (224, 224)):
        'Initialization'
        self.image_dir = image_dir # image id list
        self.annotation_dir = annotation_dir
        ids = []
        for clz in os.listdir(annotation_dir):
            ids.append([clz + "/" + f for f in os.listdir(annotation_dir + "/" + clz)])
        self.ids = flatten(ids)
        self.steps = 0
        self.batch_size = batch_size
        self.target_size = target_size
        self.grid = grid

    def generate(self):
        while self.steps < len(self.ids) // self.batch_size:
            ids = self.ids[self.steps * self.batch_size: (self.steps + 1) * self.batch_size]
            image_files = [self.image_dir + "/" + id + "." + JPEG_EXT for id in ids]
            anno_files = [self.annotation_dir + "/" + id for id in ids]
            ys = [anno_file_to_yolo_y(af, self.grid) for af in anno_files]
            xs = [image.img_to_array(image.load_img(image_file,target_size =  self.target_size)) for image_file in image_files]
            self.steps += 1
            yield np.array(xs, dtype=np.float16), np.array(ys, dtype=np.float16)

In [11]:
# build the feature data
round = .1
nb_train_samples = NB_TRAIN_SAMPLES = int(16494 * round // 1) ## copy from split script
nb_val_samples = NB_VAL_SAMPLES = int(4086 * round //1) ## copy from split script
epochs = 50
batch_size = 16
n_classes = 120
img_width, img_height = 224, 224

base_model_name = "inception"
GRIDS = {"inception": (5,5)}
grid = GRIDS[base_model_name]

mkdirp(base_model_name)

In [13]:
# generate the feature map files if not yet generate before
train_generator = YoloDataGenerator(train_images_root, train_annotations_root, grid = grid)
val_generator = YoloDataGenerator(val_images_root, val_annotations_root, grid = grid)

def generate_features(generator, processor, model, total_samples, output_dir, prefix, batch_size = 16):
    steps = 1
    enqueuer = GeneratorEnqueuer(generator)
    feature_file = npy_file_x(output_dir, prefix)
    label_file = npy_file_y(output_dir, prefix)
    all_features = []
    all_labels = []
    batch_nums = total_samples//batch_size
    progbar = Progbar(target=batch_nums)
    
    enqueuer.start()
    output_generator = enqueuer.get()
    
    for b in range(0, batch_nums):
      batch_x, batch_labels = next(output_generator) ## 从原始数据中拉取一批数据
      feature = model.predict(processor(batch_x), batch_size = batch_size) # 根据数据提取feature
      all_labels.append(batch_labels)
      all_features.append(feature)
      steps += 1
      progbar.update(steps)

    all_features = np.concatenate(all_features)
    all_labels = np.concatenate(all_labels)
    
    print "Round {} finished, saved to features {} (shapes {}) labels {} (shape {})".format(round, feature_file, all_features.shape, label_file, all_labels.shape)
    
    np.save(open(feature_file, 'w'), all_features)
    np.save(open(label_file, 'w'), all_labels)


In [16]:
# generate the features
base_model, base_decoder, processor = load_base(base_model_name)
for layer in base_model.layers[:-1]:
    layer.trainable = False
print "load the base model done", base_model_name
generate_features(val_generator.generate(), processor, base_model, nb_val_samples, base_model_name, "val-dogs-120")
generate_features(train_generator.generate(), processor, base_model, nb_train_samples, base_model_name, "train-dogs-120")

load the base model done inception
Round 0.1 finished, saved to features inception/val-dogs-120.x.npy (shapes (400, 5, 5, 2048)) labels inception/val-dogs-120.y.npy (shape (400, 125))
Round 0.1 finished, saved to features inception/train-dogs-120.x.npy (shapes (1648, 5, 5, 2048)) labels inception/train-dogs-120.y.npy (shape (1648, 125))
