# Naive Object Detection

This attempt at object detection takes images of size 448x448 (scaling to this size if necessary),
and walks through it with a window of size 224x224 (in total 7x7=49 positions).
For each window it uses a pretrained image classifier from vgg16
(https://neurohive.io/en/popular-networks/vgg16/).

It then reports for top classes (manually selected) the top windows and shows them.

The code is almost literally taken from https://github.com/DOsinga/deep_learning_cookbook.

In [None]:
from keras.applications import vgg16
from keras import backend as K
from keras.preprocessing.image import load_img, img_to_array
from keras.models import Model, load_model
from keras.layers import Flatten, Dense, Input, TimeDistributed
import numpy as np
from collections import Counter, defaultdict
from keras.preprocessing import image
from PIL import ImageDraw

from scipy.misc import imread, imresize, imsave, fromimage, toimage

try:
    from io import BytesIO
except ImportError:
    from StringIO import StringIO as BytesIO
import PIL
from IPython.display import clear_output, Image, display, HTML

## Helper Routines

Some helper routines to pre-process an image, and to show a pre-processed image again.

In [None]:
def showarray(a, fmt='jpeg'):
    f = BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

def preprocess_image(image_path, target_size=None):
    img = load_img(image_path, target_size=target_size)
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg16.preprocess_input(img)
    return img

def deprocess_image(x, w, h):
    x = x.copy()
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, w, h))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((w, h, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

## Loading pretrained classifier

In [None]:
base_model = vgg16.VGG16(weights='imagenet', include_top=True)
base_model.summary()

## Object Detection

### Loading image

Loading an image, preprocess it, and output the preprocessed image for verification

In [None]:
cat_dog2 = preprocess_image('data/cat_dog.jpg', target_size=(448, 448))
showarray(deprocess_image(cat_dog2, 448, 448))

### Creating regions

Using a sliding 224x224 window, create 49 regions which will then be classified.

In [None]:
crops = []
rects = []
for x in range(7):
    for y in range(7):
        crops.append(cat_dog2[0, x * 32: x * 32 + 224, y * 32: y * 32 + 224, :])
        rects.append((y * 32, x * 32, 224 + y * 32, 224 + x * 32))
crops = np.asarray(crops)

### Run classifier and show top results

In [None]:
preds = base_model.predict(vgg16.preprocess_input(crops))
crop_scores = defaultdict(list)
for idx, pred in enumerate(vgg16.decode_predictions(preds, top=1)):
    _, label, weight = pred[0]
    crop_scores[label].append((idx, weight))
crop_scores.keys()

### Show top results

Using manually selected classes, show the top regions for the classes

In [None]:
def draw_best_region_for_label(l, draw, label, color=(0,0,0)):
    idx = max(l[label], key=lambda t:t[1])[0]
    draw.rectangle(rects[idx], outline=color)
    
cat_dog_img = image.load_img('data/cat_dog.jpg', target_size=(448, 448))
draw = ImageDraw.Draw(cat_dog_img)
draw_best_region_for_label(crop_scores, draw, 'Egyptian_cat', (255,0,0))
draw_best_region_for_label(crop_scores, draw, 'Labrador_retriever', (0,255,0))
cat_dog_img