In [2]:
%matplotlib inline

import os
import glob
import json
import time
import math
import copy
import datetime

import bcolz
import numpy as np
import pandas as pd

from skimage import transform
from skimage import img_as_ubyte

from keras.utils import np_utils
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
from IPython.display import display

import utils
import config

Using TensorFlow backend.


In [3]:
classes = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

In [4]:
def read_relabels(path):
    # https://www.kaggle.com/c/the-nature-conservancy-fisheries-monitoring/forums/t/28150/unified-effort-to-relabel-the-training-set
    relabels = {}
    with open(path) as f:
        for line in f:
            cols = line.split()
            src = "{}/{}/{}.jpg".format(config.training_images_path, cols[1], cols[0])
            relabels[src] = cols[2]
    return relabels


def read_bbox_annotations(path):
    boxes = {}
    for c in classes:
        path = os.path.join(path, c + '.json')
        if os.path.isfile(path):
            class_boxes = utils.read_bbox_json(path)
            boxes.update(class_boxes)
    return boxes


def preprocess_img_data(img_arr):
    preprocess_input(img_arr)


def read_training_images(path, boxes, relabels):
    X_train = []
    y_train = []
    y_train_box = []

    t0 = time.time()
    print('Reading training images...')

    for c in classes:
        images = glob.glob(os.path.join(path, c, '*.jpg'))
        class_index = classes.index(c)
        print('Loading class: {}'.format(c))

        for img_path in images:
            # print('Reading: ', img_path)
            img_name = os.path.basename(img_path)
            img = Image.open(img_path)
            x_scale = float(config.img_w) / float(img.width)
            y_scale = float(config.img_h) / float(img.height)
            img = img.resize((config.img_w, config.img_h))

            max_box = [0, 0, 0, 0]
            # get the largest bbox
            if c in boxes and img_name in boxes[c]:
                img_boxes = boxes[c][img_name]
                max_area = 0
                for box in img_boxes:
                    box_area = box[2] * box[3]
                    if box_area > max_area:
                        max_area = box_area
                        max_box = box

            max_box[0] *= x_scale
            max_box[1] *= y_scale
            max_box[2] *= x_scale
            max_box[3] *= y_scale

            add_img = True
            img_class = class_index
            if img_path in relabels:
                if relabels[img_path] == 'revise':
                    add_img = False
                    print('Image omitted: ', img_path)
                else:
                    print('Label revised: ', img_path, relabels[img_path])
                    img_class = classes.index(relabels[img_path])

            if add_img:
                X_train.append(np.asarray(img, dtype=np.uint8))
                y_train.append(img_class)
                y_train_box.append(max_box)

    X_train = np.array(X_train)
    y_train = np.array(y_train, dtype=np.uint8)
    y_train = np_utils.to_categorical(y_train, 8)
    y_train_box = np.array(y_train_box, dtype=np.float32)

    t1 = time.time()
    print('Reading finished: {} seconds'.format(round(t1 - t0, 2)))
    print('Training data shape:', X_train.shape)
    return X_train, y_train, y_train_box


def read_testing_images(path):
    X_test = []
    Id_test = []

    print('Reading testing data...')
    t0 = time.time()
    images = glob.glob(os.path.join(path, '*.jpg'))

    for img_path in images:
        img = Image.open(img_path)
        img = img.resize((config.img_w, config.img_h), Image.ANTIALIAS)
        X_test.append(np.asarray(img, dtype=np.float32))
        Id_test.append(os.path.basename(img_path))

    X_test = np.array(X_test)
    Id_test = np.array(Id_test)

    t1 = time.time()
    print('Reading finished: {} seconds'.format(round(t1 - t0, 2))) 
    print('Test data shape:', X_test.shape)
    return X_test, Id_test

In [85]:
def plot(img):
    x = copy.copy(img)
    '''
    x = x + max(-np.min(x), 0)
    x_max = np.max(x)
    if x_max != 0:
        x /= x_max
    x *= 255
    '''
    plt.imshow(np.array(x, dtype=np.uint8))



def plot_box(a, b, c, d):
    ax = plt.gca()
    ax.text(a[0], a[1], 'P0', color='yellow')
    ax.text(b[0], b[1], 'P1', color='yellow')
    ax.text(c[0], c[1], 'P2', color='yellow')
    ax.text(d[0], d[1], 'P3', color='yellow')
    ax.plot(a[0], a[1], 'o', color='red')
    ax.plot(b[0], b[1], 'o', color='red')
    ax.plot(c[0], c[1], 'o', color='red')
    ax.plot(d[0], d[1], 'o', color='red')
    
    ax.plot([a[0], b[0]], [a[1], b[1]], color='green')
    ax.plot([b[0], c[0]], [b[1], c[1]], color='green')
    ax.plot([c[0], d[0]], [c[1], d[1]], color='green')
    ax.plot([d[0], a[0]], [d[1], a[1]], color='green')


def create_rect_xywh(box, color='red'):
    return plt.Rectangle((box[0], box[1]), box[2], box[3],
                         color=color, fill=False, linewidth=2)

def plot_bb(img, bb):
    plt.figure(figsize=(9, 12))
    plot(img)
    ax = plt.gca()
    print('Box (x,y,w,h): ', bb)
    if bb[2] > 0 and bb[3] > 0:
        ax.add_patch(create_rect_xywh(bb, 'yellow'))
        

def rotation(angle):
    return np.array([[np.cos(angle), -np.sin(angle), 0],
                     [np.sin(angle), np.cos(angle), 0],
                     [0, 0, 1]])


def translation(x, y):
    return np.array([[1, 0, x],
                     [0, 1, y],
                     [0, 0, 1]])


def scale(sx, sy):
    return np.array([[sx, 0, 0],
                     [0, sy, 0],
                     [0, 0,  1]])

    
def box_zoom_rotate_translate(img, bb, x_scale_range,
                              y_scale_range, rotation_range,
                              translation_range, mode='edge'):
    """Performs zoom of a Numpy image tensor.
    # Arguments
        img: Input image tensor (w, h, c).
        bb: Bounding box tuple/array [x, y, w, h]
        x_scale_range: [1, sx]: x scale range.
        y_scale_range: [1, sy]: y scale range.
        rotation_range: Rotation range.
        translation_range: Translation range.
        mode: 
    # Returns
        Zoomed, rotated and translated numpy image tensor (w, h, c).
        New axis aligned bounding box [x, y, w, h].
        Transformed original bounding box [[x1,y1], [x2,y2], [x3,y3], [x4,y4]].
    # Raises
        None
    """
    
    sx = np.random.uniform(1, x_scale_range)
    sy = np.random.uniform(1, y_scale_range)
    theta = np.random.uniform(-rotation_range, rotation_range)
    rtx = np.random.uniform(-translation_range, translation_range)
    rty = np.random.uniform(-translation_range, translation_range)
    
    # zoom center
    zcx = (bb[0] + bb[2] / 2.0 + 0.5)
    zcy = (bb[1] + bb[3] / 2.0 + 0.5)
    
    # box top left
    box_tl = [bb[0], bb[1], 1]
    # box bottom right
    box_br = [bb[0] + bb[2], bb[1] + bb[3], 1]
    
    # transformation matrices
    tm = translation(-zcx, -zcy)
    sm = scale(sx, sy)
    rm = rotation(-theta)
    
    # rotate and zoom around the center of bb
    t = np.dot(rm, np.dot(sm, tm))
    
    # calculate zoomed and rotated bounding box
    v = np.array([box_br[0] - box_tl[0], 0, 1])
    
    box_tl = np.dot(t, box_tl)
    box_br = np.dot(t, box_br)
    v = np.dot(rm, np.dot(sm, v))
    
    box = np.array([box_tl, box_tl + v, box_br, box_br - v])
    
    # calculate min and max translation so that the final axis aligned
    # box remains inside the image
    tl_x = np.min([p[0] for p in box])
    tl_y = np.min([p[1] for p in box])
    br_x = np.max([p[0] for p in box])
    br_y = np.max([p[1] for p in box])
    
    min_translation = -1 * np.array([tl_x, tl_y])
    max_translation = img.shape[:2] - np.array([br_x, br_y])
    
    # get random translation between min and max
    rtx = np.max([min_translation[0], rtx])
    rtx = np.min([max_translation[0], rtx])
    rty = np.max([min_translation[1], rty])
    rty = np.min([max_translation[1], rty])
    
    t2 = translation(rtx, rty)
    
    # calculate final axis aligned bounding box
    box_tl = np.dot(t2, box_tl)
    box_br = np.dot(t2, box_br)
    
    box = np.array([box_tl, box_tl + v, box_br, box_br - v])
    box_tl[0] = np.min([p[0] for p in box])
    box_tl[1] = np.min([p[1] for p in box])
    box_br[0] = np.max([p[0] for p in box])
    box_br[1] = np.max([p[1] for p in box]) 

    # transform the image
    tc = transform.SimilarityTransform(matrix=translation(zcx, zcy))
    tz = transform.SimilarityTransform(matrix=scale(1.0 / sx, 1.0 / sy))
    tr = transform.SimilarityTransform(matrix=rotation(theta))
    tu = transform.SimilarityTransform(matrix=translation(-rtx, -rty))
    
    img = img_as_ubyte(transform.warp(img, tu + tr + tz + tc, mode=mode))
    return img, [box_tl[0], box_tl[1], box_br[0] - box_tl[0], box_br[1] - box_tl[1]], box

In [5]:
boxes = read_bbox_annotations(config.bbox_annotations_path)
relabels = read_relabels('relabels.csv')

X_train, y_train, y_train_box = \
    read_training_images(config.training_images_path, boxes, relabels)

In [379]:
print('Saving training data...')
utils.save_array('X_train.bcolz', X_train)
utils.save_array('y_train.bcolz', y_train)
utils.save_array('y_train_box.bcolz', y_train_box)

Saving training data...


In [91]:
print('Augmenting training data...')
# extend training set
factor = 10
rotation_range = 2 * np.pi
x_scale_range = 1.2
y_scale_range = 1.2
translation_range = 50

n = len(X_train)
total = n * factor
cnt = 0
last_percent = 0

for i in range(n):
    c = y_train[i]
    for k in range(factor):
        img, a_bb, t_bb = box_zoom_rotate_translate(X_train[i],
                                                    y_train_box[i],
                                                    x_scale_range,
                                                    y_scale_range,
                                                    rotation_range,
                                                    translation_range)
        X_train = np.append(X_train, [img], axis=0)
        y_train = np.append(y_train, [c], axis=0)
        
        # append dummy bbox if there isn't any give for this image
        if y_train_box[i][2] == 0 and y_tran_box[i][3] == 0:
            y_train_box = np.append(y_train_box, [[0, 0, 0, 0]], axis=0)
        else:
            y_train_box = np.append(y_train_box, [a_bb], axis=0)
        
        cnt += 1
        percent = int(cnt / (total / 100.0))
        if percent != last_percent:
            print('progress: {}%'.format(precent))
            last_percent = percent

print('X_train:', X_train.shape)
print('y_train:', y_train.shape)
print('y_train_box:', y_train_box.shape)
utils.save_array('X_train_augmented.bcolz', X_train)
utils.save_array('y_train_augmented.bcolz', y_train)
utils.save_array('y_train_box_augmented.bcolz', y_train_box)

theta: 1.1808175151407987


  "%s to %s" % (dtypeobj_in, dtypeobj))


theta: -2.336895399346322
(3768, 448, 448, 3)
(3768, 8)
(3767, 4)


In [12]:
X_test, Id_test = \
    read_testing_images(config.testing_images_path)

Reading testing data...


KeyboardInterrupt: 

In [None]:
print('Preprocessing X_train...')
vgg = create_vgg(config.img_w, config.img_h)
X_train_feat = predict(X_train, vgg, 8)

print('Preprocessing X_test...')
X_test_feat = predict(X_test, vgg, 8)

utils.save_array('X_train_feat.bcolz', X_train_feat)
utils.save_array('X_test_feat.bcolz', X_test_feat)