In [1]:
import json
import matplotlib.pyplot as plt
import os

import numpy as np
from pandas import read_csv, merge, concat
from skimage.io import imread
from cv2 import resize

from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model

from efficientnet.keras import EfficientNetB0
from efficientnet.keras import preprocess_input

BATCH_SIZE = 4
STEPS_PER_EPOCH = 2000
RANDOM_SEED = 10000
IMG_SIZE = 256

PATH_CHECKPOINT = 'models'
MODEL = 'efficientnet_classifier'


Using TensorFlow backend.


In [2]:
def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''

    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


def masks_as_image(in_mask_list):
    # Take the individual ship masks and create a single mask array for all ships
    all_masks = np.zeros((768, 768), dtype=np.int16)
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)
    return all_masks


def make_image_generator(in_df, batch_size=BATCH_SIZE):
    all_batches = list(in_df.groupby('ImageId'))
    out_rgb = []
    labels = []
    while True:
        np.random.shuffle(all_batches)
        for c_img_id, c_masks in all_batches:
            rgb_path = os.path.join(train_image_dir, c_img_id)
            _img = imread(rgb_path)
            _img = resize(_img, (IMG_SIZE, IMG_SIZE))
            _img = _img.astype(np.float32)

            out_rgb += [_img]
            labels.append(c_masks['has_ship'].iloc[0])
            if len(out_rgb) >= batch_size:
                yield np.stack(out_rgb, 0), np.array(labels)
                out_rgb, labels = [], []


def create_aug_generator(in_gen, _image_generator, seed=RANDOM_SEED):
    np.random.seed(seed)
    for in_x, in_y in in_gen:
        _seed = np.random.choice(range(seed))
        sample = _image_generator.flow(in_x,
                                       in_y,
                                       batch_size=in_x.shape[0],
                                       seed=_seed,
                                       shuffle=True)

        yield next(sample)


def preprocess_data(_train_image_dir, _exclude_list, n_train=0.8, balanced=False):
    masks = read_csv(os.path.join(data_dir, 'train_ship_segmentations_v2.csv.zip'))

    masks = masks[~masks['ImageId'].isin(_exclude_list)]

    masks['ships'] = masks['EncodedPixels'].map(lambda row: 1 if isinstance(row, str) else 0)
    unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'}).reset_index()

    unique_img_ids['has_ship'] = unique_img_ids['ships'].map(lambda x: 1.0 if x > 0 else 0.0)

    n_train_split = int(n_train * len(unique_img_ids))

    train_ids = unique_img_ids.iloc[:n_train_split]
    valid_ids = unique_img_ids.iloc[n_train_split:]

    if balanced:
        ones = train_ids.loc[train_ids['has_ship'] == 1]
        zeros = train_ids.loc[train_ids['has_ship'] == 0]
        zeros = zeros.sample(len(ones))
        train_ids = concat([ones, zeros])

    masks.drop(['ships'], axis=1, inplace=True)

    _train_df = merge(masks, train_ids)
    _valid_df = merge(masks, valid_ids)

    return _train_df, _valid_df


In [3]:
exclude_list = ['6384c3e78.jpg', '13703f040.jpg', '14715c06d.jpg', '33e0ff2d5.jpg',
                '4d4e09f2a.jpg', '877691df8.jpg', '8b909bb20.jpg', 'a8d99130e.jpg',
                'ad55c3143.jpg', 'c8260c541.jpg', 'd6c7f17c7.jpg', 'dc3e7c901.jpg',
                'e44dffe88.jpg', 'ef87bad36.jpg', 'f083256d8.jpg',
                '66049e4ea.jpg']  # corrupted images

with open('config.json', 'r') as file:
    configs = json.load(file)

data_dir = configs["DATA_DIRECTORY"]
train_image_dir = os.path.join(data_dir, 'train_v2')

train_df, test_df = preprocess_data(train_image_dir, exclude_list, n_train=0.85, balanced=True)    

data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)

K.set_image_data_format('channels_last')

model = load_model(f'models/{MODEL}.hdf5')


In [4]:
train_eval_generator = create_aug_generator(make_image_generator(train_df), data_generator)

In [5]:
model.evaluate_generator(train_eval_generator, steps=STEPS_PER_EPOCH)


[0.31314143538475037, 0.9286249876022339]

In [6]:
test_eval_generator = create_aug_generator(make_image_generator(test_df), data_generator)

In [7]:
model.evaluate_generator(test_eval_generator, steps=STEPS_PER_EPOCH)


[0.006274908781051636, 0.9441249966621399]