In [1]:
%matplotlib inline

import os
import glob
import json
import time
import math
import copy
import datetime

import bcolz
import numpy as np
import pandas as pd

import skimage
from skimage import transform
from skimage import img_as_ubyte

from keras.utils import np_utils
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
from IPython.display import display

import utils
import config

Using TensorFlow backend.


In [2]:
classes = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
np.random.seed(config.random_state)

In [3]:
def read_relabels(path):
    # https://www.kaggle.com/c/the-nature-conservancy-fisheries-monitoring/forums/t/28150/unified-effort-to-relabel-the-training-set
    relabels = {}
    with open(path) as f:
        for line in f:
            cols = line.split()
            src = "{}/{}/{}.jpg".format(config.training_images_path, cols[1], cols[0])
            relabels[src] = cols[2]
    return relabels


def read_bbox_annotations(path):
    boxes = {}
    for c in classes:
        path = os.path.join(path, c + '.json')
        if os.path.isfile(path):
            class_boxes = utils.read_bbox_json(path)
            boxes.update(class_boxes)
    return boxes


def preprocess_img_data(img_arr):
    preprocess_input(img_arr)


def read_training_images(path, boxes, relabels):
    X_train = []
    y_train = []
    y_train_box = []

    t0 = time.time()
    print('Reading training images...')

    for c in classes:
        images = glob.glob(os.path.join(path, c, '*.jpg'))
        class_index = classes.index(c)
        print('Loading class: {}'.format(c))

        for img_path in images:
            # print('Reading: ', img_path)
            img_name = os.path.basename(img_path)
            img = Image.open(img_path)
            x_scale = float(config.img_w) / float(img.width)
            y_scale = float(config.img_h) / float(img.height)
            img = img.resize((config.img_w, config.img_h))

            max_box = [0, 0, 0, 0]
            # get the largest bbox
            if c in boxes and img_name in boxes[c]:
                img_boxes = boxes[c][img_name]
                max_area = 0
                for box in img_boxes:
                    box_area = box[2] * box[3]
                    if box_area > max_area:
                        max_area = box_area
                        max_box = box

            max_box[0] *= x_scale
            max_box[1] *= y_scale
            max_box[2] *= x_scale
            max_box[3] *= y_scale

            add_img = True
            img_class = class_index
            if img_path in relabels:
                if relabels[img_path] == 'revise':
                    add_img = False
                    print('Image omitted: ', img_path)
                else:
                    print('Label revised: ', img_path, relabels[img_path])
                    img_class = classes.index(relabels[img_path])

            if add_img:
                X_train.append(np.asarray(img, dtype=np.uint8))
                y_train.append(img_class)
                y_train_box.append(max_box)

    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train, dtype=np.uint8)
    y_train = np_utils.to_categorical(y_train, 8)
    y_train_box = np.asarray(y_train_box, dtype=np.float32)

    t1 = time.time()
    print('Reading finished: {} seconds'.format(round(t1 - t0, 2)))
    print('Training data shape:', X_train.shape)
    return X_train, y_train, y_train_box


def read_testing_images(path):
    X_test = []
    Id_test = []

    print('Reading testing data...')
    t0 = time.time()
    images = glob.glob(os.path.join(path, '*.jpg'))

    for img_path in images:
        img = Image.open(img_path)
        img = img.resize((config.img_w, config.img_h), Image.ANTIALIAS)
        X_test.append(np.asarray(img, dtype=np.float32))
        Id_test.append(os.path.basename(img_path))

    X_test = np.array(X_test)
    Id_test = np.array(Id_test)

    t1 = time.time()
    print('Reading finished: {} seconds'.format(round(t1 - t0, 2))) 
    print('Test data shape:', X_test.shape)
    return X_test, Id_test

In [4]:
def plot(img):
    x = copy.copy(img)
    '''
    x = x + max(-np.min(x), 0)
    x_max = np.max(x)
    if x_max != 0:
        x /= x_max
    x *= 255
    '''
    plt.imshow(np.array(x, dtype=np.uint8))



def plot_box(a, b, c, d):
    ax = plt.gca()
    ax.text(a[0], a[1], 'P0', color='yellow')
    ax.text(b[0], b[1], 'P1', color='yellow')
    ax.text(c[0], c[1], 'P2', color='yellow')
    ax.text(d[0], d[1], 'P3', color='yellow')
    ax.plot(a[0], a[1], 'o', color='red')
    ax.plot(b[0], b[1], 'o', color='red')
    ax.plot(c[0], c[1], 'o', color='red')
    ax.plot(d[0], d[1], 'o', color='red')
    
    ax.plot([a[0], b[0]], [a[1], b[1]], color='green')
    ax.plot([b[0], c[0]], [b[1], c[1]], color='green')
    ax.plot([c[0], d[0]], [c[1], d[1]], color='green')
    ax.plot([d[0], a[0]], [d[1], a[1]], color='green')


def create_rect_xywh(box, color='red'):
    return plt.Rectangle((box[0], box[1]), box[2], box[3],
                         color=color, fill=False, linewidth=2)

def plot_bb(img, bb):
    plt.figure(figsize=(9, 12))
    plot(img)
    ax = plt.gca()
    print('Box (x,y,w,h): ', bb)
    if bb[2] > 0 and bb[3] > 0:
        ax.add_patch(create_rect_xywh(bb, 'yellow'))
        

def rotation(angle):
    return np.array([[np.cos(angle), -np.sin(angle), 0],
                     [np.sin(angle), np.cos(angle), 0],
                     [0, 0, 1]])


def translation(x, y):
    return np.array([[1, 0, x],
                     [0, 1, y],
                     [0, 0, 1]])


def scale(sx, sy):
    return np.array([[sx, 0, 0],
                     [0, sy, 0],
                     [0, 0,  1]])


def box_zoom_rotate_translate(img, bb, x_scale_range,
                              y_scale_range, rotation_range,
                              translation_range, mode='edge'):
    """Performs zoom of a Numpy image tensor.
    # Arguments
        img: Input image tensor (w, h, c).
        bbox: Bounding box tuple/array [x, y, w, h]
        x_scale_range: [1, sx]: x scale range.
        y_scale_range: [1, sy]: y scale range.
        rotation_range: Rotation range.
        translation_range: Translation range.
        mode: 
    # Returns
        Zoomed, rotated and translated numpy image tensor (w, h, c).
        New axis aligned bounding box [x, y, w, h].
        Transformed original bounding box [[x1,y1], [x2,y2], [x3,y3], [x4,y4]].
    # Raises
        None
    """
    
    sx = np.random.uniform(1, x_scale_range)
    sy = np.random.uniform(1, y_scale_range)
    theta = np.random.uniform(-rotation_range, rotation_range)
    rtx = np.random.uniform(-translation_range, translation_range)
    rty = np.random.uniform(-translation_range, translation_range)
    
    if bb[2] == 0 and bb[3] == 0:
        zcx = (img.shape[0] / 2.0)
        zcy = (img.shape[1] / 2.0)
        rtx = zcx - rtx
        rty = zcy - rty
        box_tl = [0, 0]
        box_br = [0, 0]
        box = [0, 0, 0, 0]
    else:
        # zoom center
        zcx = (bb[0] + bb[2] / 2.0)
        zcy = (bb[1] + bb[3] / 2.0)

        # box top left
        box_tl = [bb[0], bb[1], 1]
        # box bottom right
        box_br = [bb[0] + bb[2], bb[1] + bb[3], 1]

        # transformation matrices
        tm = translation(-zcx, -zcy)
        sm = scale(sx, sy)
        rm = rotation(-theta)

        # rotate and zoom around the center of bb
        t = np.dot(rm, np.dot(sm, tm))

        # calculate zoomed and rotated bounding box
        v = np.array([box_br[0] - box_tl[0], 0, 1])

        box_tl = np.dot(t, box_tl)
        box_br = np.dot(t, box_br)
        v = np.dot(rm, np.dot(sm, v))

        box = np.array([box_tl, box_tl + v, box_br, box_br - v])

        # calculate min and max translation so that the final axis aligned
        # box remains inside the image
        tl_x = np.min([p[0] for p in box])
        tl_y = np.min([p[1] for p in box])
        br_x = np.max([p[0] for p in box])
        br_y = np.max([p[1] for p in box])
        
        min_translation = -1 * np.array([tl_x, tl_y])
        max_translation = img.shape[:2] - np.array([br_x, br_y])

        # get random translation between min and max
        rtx += zcx
        rty += zcy
        rtx = np.max([min_translation[0], rtx])
        rtx = np.min([max_translation[0], rtx])

        rty = np.max([min_translation[1], rty])
        rty = np.min([max_translation[1], rty])
    
        t2 = translation(rtx, rty)

        # calculate final axis aligned bounding box
        box_tl = np.dot(t2, box_tl)
        box_br = np.dot(t2, box_br)

        box = np.array([box_tl, box_tl + v, box_br, box_br - v])
        box_tl[0] = np.min([p[0] for p in box])
        box_tl[1] = np.min([p[1] for p in box])
        box_br[0] = np.max([p[0] for p in box])
        box_br[1] = np.max([p[1] for p in box])

    # transform the image
    tc = transform.SimilarityTransform(matrix=translation(zcx, zcy))
    tz = transform.SimilarityTransform(matrix=scale(1.0 / sx, 1.0 / sy))
    tr = transform.SimilarityTransform(matrix=rotation(theta))
    tu = transform.SimilarityTransform(matrix=translation(-rtx, -rty))
    
    img = img_as_ubyte(transform.warp(img, tu + tr + tz + tc, mode=mode)) 
    return img, [box_tl[0], box_tl[1], box_br[0] - box_tl[0], box_br[1] - box_tl[1]], box

In [6]:
boxes = read_bbox_annotations(config.bbox_annotations_path)
relabels = read_relabels('relabels.csv')

X_train, y_train, y_train_box = \
    read_training_images(config.training_images_path, boxes, relabels)

Reading training images...
Loading class: ALB
Label revised:  train/ALB/img_02086.jpg YFT
Label revised:  train/ALB/img_00248.jpg OTHER
Label revised:  train/ALB/img_01363.jpg OTHER
Label revised:  train/ALB/img_00568.jpg NoF
Loading class: BET
Loading class: DOL
Image omitted:  train/DOL/img_07212.jpg
Loading class: LAG
Loading class: NoF
Image omitted:  train/NoF/img_01989.jpg
Image omitted:  train/NoF/img_03386.jpg
Label revised:  train/NoF/img_06031.jpg ALB
Label revised:  train/NoF/img_06675.jpg ALB
Label revised:  train/NoF/img_07724.jpg YFT
Label revised:  train/NoF/img_04847.jpg ALB
Label revised:  train/NoF/img_02325.jpg BET
Label revised:  train/NoF/img_04590.jpg ALB
Label revised:  train/NoF/img_00076.jpg ALB
Label revised:  train/NoF/img_04615.jpg ALB
Label revised:  train/NoF/img_00904.jpg ALB
Label revised:  train/NoF/img_00028.jpg ALB
Label revised:  train/NoF/img_02621.jpg ALB
Label revised:  train/NoF/img_06266.jpg YFT
Label revised:  train/NoF/img_02302.jpg ALB
Label 

In [7]:
print('Saving training data...')
utils.save_array('X_train.bcolz', X_train)
utils.save_array('y_train.bcolz', y_train)
utils.save_array('y_train_box.bcolz', y_train_box)

Saving training data...


In [8]:
from joblib import Parallel, delayed
import multiprocessing
import warnings
warnings.filterwarnings("ignore")

print('Augmenting training data...')
# extend training set
factor = 5
rotation_range = 2 * np.pi
x_scale_range = 1.2
y_scale_range = 1.2
translation_range = 50

n = len(X_train)
total = n * factor
cnt = 0
last_percent = 0
batch_size = 100

num_cores = multiprocessing.cpu_count()

def phelper(X, y, y_box):
    X_result = []
    y_result = []
    y_box_result = []
    
    t0 = time.time()
    for i in range(len(X)):
        c = y[i]
        for k in range(factor):
            img, a_bb, r_bb = box_zoom_rotate_translate(X[i],
                                                        y_box[i],
                                                        x_scale_range,
                                                        y_scale_range,
                                                        rotation_range,
                                                        translation_range)            
            X_result.append(img)
            y_result.append(c)
            y_box_result.append(a_bb)
    t1 = time.time()
    print('{} items in {:.5f} seconds'.format(len(X), t1 - t0))
    return (X_result, y_result, y_box_result)


results = Parallel(n_jobs=num_cores)(delayed(phelper)(X_train[i:i + batch_size], y_train[i:i + batch_size], y_train_box[i:i + batch_size]) for i in range(0, len(X_train), batch_size))

Augmenting training data...
100 items in 27.17373 seconds
100 items in 27.26481 seconds
100 items in 27.65015 seconds
100 items in 27.71538 seconds
100 items in 28.05418 seconds
100 items in 27.96163 seconds
100 items in 27.79634 seconds
100 items in 27.82390 seconds
100 items in 27.88686 seconds
100 items in 27.83264 seconds
100 items in 27.53073 seconds
100 items in 27.79231 seconds
100 items in 23.40769 seconds
100 items in 23.79055 seconds
100 items in 24.21567 seconds
100 items in 24.96522 seconds
100 items in 25.84347 seconds
100 items in 27.31962 seconds
100 items in 28.18462 seconds
100 items in 27.55031 seconds
100 items in 28.01662 seconds
100 items in 28.57741 seconds
100 items in 29.08917 seconds
100 items in 28.80423 seconds
100 items in 28.74994 seconds
100 items in 28.68003 seconds
100 items in 29.11735 seconds
100 items in 28.92473 seconds
100 items in 28.75981 seconds
100 items in 28.26348 seconds
100 items in 27.39495 seconds
100 items in 27.84894 seconds
100 items in

In [9]:
cnt = 0
for batch in results:
    X_train = np.append(X_train, batch[0], axis=0)
    y_train = np.append(y_train, batch[1], axis=0)
    y_train_box = np.append(y_train_box, batch[2], axis=0)
    print('batch processed', cnt)
    cnt += 1

print('X_train:', X_train.shape)
print('y_train:', y_train.shape)
print('y_train_box:', y_train_box.shape)

batch processed 0
batch processed 1
batch processed 2
batch processed 3
batch processed 4
batch processed 5
batch processed 6
batch processed 7
batch processed 8
batch processed 9
batch processed 10
batch processed 11
batch processed 12
batch processed 13
batch processed 14
batch processed 15
batch processed 16
batch processed 17
batch processed 18
batch processed 19
batch processed 20
batch processed 21
batch processed 22
batch processed 23
batch processed 24
batch processed 25
batch processed 26
batch processed 27
batch processed 28
batch processed 29
batch processed 30
batch processed 31
batch processed 32
batch processed 33
batch processed 34
batch processed 35
batch processed 36
batch processed 37
X_train: (22554, 448, 448, 3)
y_train: (22554, 8)
y_train_box: (22554, 4)


In [10]:
utils.save_array('X_train_augmented.bcolz', X_train)
utils.save_array('y_train_augmented.bcolz', y_train)
utils.save_array('y_train_box_augmented.bcolz', y_train_box)

In [11]:
X_test, Id_test = \
    read_testing_images(config.testing_images_path)

Reading testing data...
Reading finished: 31.37 seconds
Test data shape: (1000, 448, 448, 3)


In [25]:
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.layers import Input
from keras.applications.imagenet_utils import preprocess_input

def create_vgg(w, h):
    model = VGG16(include_top=False, weights='imagenet',
                  input_tensor=Input(shape=(h, w, 3)))
    for layer in model.layers:
        layer.trainable = False
    return model


def predict(X, model, batch_size):
    y_pred = []
    for batch in range(0, len(X), batch_size):
        X_p = preprocess_input(np.asarray(X[i:i+batch_size], dtype=np.float32))
        y_pred.extend(model.predict(X_p))
    return np.array(y_pred)

vgg = create_vgg(config.img_w, config.img_h)

In [None]:
print('Preprocessing X_train...')
X_train_feat = predict(X_train, vgg, 16)
print('Saving..')
utils.save_array('X_train_feat.bcolz', X_train_feat)
print('Done')

In [None]:
print('Preprocessing X_test...')
X_test = preprocess_input(X_test)
X_test_feat = predict(X_test, vgg, 16)
print('Saving..')
utils.save_array('X_test_feat.bcolz', X_test_feat)
print('Done')