In [1]:
import os, gc
import numpy as np
import math
import pandas as pd
from keras import backend as K
from keras.layers import Input, MaxPooling2D, UpSampling2D, Conv2D
#from keras.layers import concatenate
#from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose
from keras.models import Model
from keras.optimizers import Adam
from skimage.transform import resize
from skimage.io import imsave, imread
from skimage.transform import resize 
#from seg_noaa import load_train_data, load_test_data
from segmodels import dice_coef, dice_coef_loss, double_conv_layer
from segmodels import create_model, preprocess_img, preprocess, test_generator
from keras.callbacks import ModelCheckpoint
from matplotlib import pyplot as plt
from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout, Activation
from keras import backend as K
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline 

smooth = 1.

gc.collect()

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


1169

In [2]:
img_rows = 320#512
img_cols = 320#512
batch_size = 16
nb_epoch = 25
print(img_rows, img_cols)
data_path = '/home/ubuntu/noaa/darknet/seals/'
train_data_path = os.path.join(data_path, 'JPEGImagesBlk')
mask_data_path = '/home/ubuntu/noaa/data/mask/classes'
smooth = 1.
K.image_dim_ordering()
classes = 5
OUTPUT_MASK_CHANNELS = 5

(320, 320)


In [3]:

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)


def double_conv_layer(x, size, dropout, batch_norm):
    conv = Convolution2D(size, 3, 3, border_mode='same')(x)
    if batch_norm == True:
        conv = BatchNormalization(mode=0, axis=1)(conv)
    conv = Activation('relu')(conv)
    conv = Convolution2D(size, 3, 3, border_mode='same')(conv)
    if batch_norm == True:
        conv = BatchNormalization(mode=0, axis=1)(conv)
    conv = Activation('relu')(conv)
    if dropout > 0:
        conv = Dropout(dropout)(conv)
    return conv


def create_model(dropout_val=0.05, batch_norm=True):
    inputs = Input((3, img_rows, img_cols))
    conv1 = double_conv_layer(inputs, 32, dropout_val, batch_norm)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = double_conv_layer(pool1, 64, dropout_val, batch_norm)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = double_conv_layer(pool2, 128, dropout_val, batch_norm)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = double_conv_layer(pool3, 256, dropout_val, batch_norm)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = double_conv_layer(pool4, 512, dropout_val, batch_norm)
    pool5 = MaxPooling2D(pool_size=(2, 2))(conv5)

    conv6 = double_conv_layer(pool5, 1024, dropout_val, batch_norm)

    up6 = merge([UpSampling2D(size=(2, 2))(conv6), conv5], mode='concat', concat_axis=1)
    conv7 = double_conv_layer(up6, 512, dropout_val, batch_norm)

    up7 = merge([UpSampling2D(size=(2, 2))(conv7), conv4], mode='concat', concat_axis=1)
    conv8 = double_conv_layer(up7, 256, dropout_val, batch_norm)

    up8 = merge([UpSampling2D(size=(2, 2))(conv8), conv3], mode='concat', concat_axis=1)
    conv9 = double_conv_layer(up8, 128, dropout_val, batch_norm)

    up9 = merge([UpSampling2D(size=(2, 2))(conv9), conv2], mode='concat', concat_axis=1)
    conv10 = double_conv_layer(up9, 64, dropout_val, batch_norm)

    up10 = merge([UpSampling2D(size=(2, 2))(conv10), conv1], mode='concat', concat_axis=1)
    conv11 = double_conv_layer(up10, 32, 0, batch_norm)

    conv12 = Convolution2D(OUTPUT_MASK_CHANNELS, 1, 1)(conv11)
    conv12 = BatchNormalization(mode=0, axis=1)(conv12)
    conv12 = Activation('sigmoid')(conv12)

    model = Model(input=inputs, output=conv12)
    return model

def preprocess_img(imgs):
    imgs_p = np.ndarray((imgs.shape[0], img_rows, img_cols, 3), dtype=np.float64)
    for i in range(imgs.shape[0]):
        imgs_p[i] = resize(imgs[i], (img_cols, img_rows, 3), preserve_range=True)

    imgs_p = imgs_p[..., np.newaxis]
    return imgs_p

def preprocess_mask(imgs, channels=5):
    imgs_p = np.ndarray((imgs.shape[0], img_rows, img_cols, channels), dtype=np.uint8)
    for i in range(imgs.shape[0]):
        imgs_p[i] = multi_resize(imgs[i])

    imgs_p = imgs_p[..., np.newaxis]
    return imgs_p

def show_mask(img):
    imout = np.zeros((img.shape[0]*2, img.shape[1]*3), dtype=np.uint8)
    for i in range(5):
        y_pos, x_pos = math.floor(i/3)*img.shape[0], i%3*img.shape[0]
        imout[int(y_pos):int((y_pos+img.shape[0])), int(x_pos):int((x_pos+img.shape[0]))] = img[:,:,i]
        imout[int(y_pos):int(y_pos)+2,:] = 1
        imout[:,int(x_pos):int(x_pos)+2] = 1
    plt.imshow(imout)
    plt.show()
    
def multi_resize(img_mask, image_rows=img_rows, image_cols=img_cols, classes=classes):
    imout = np.ndarray((image_rows, image_cols, classes), dtype=np.uint8)
    for i in range(classes):
        imout[:,:,i] = resize(img_mask[:,:,i].astype(np.float32), (img_rows, img_cols), mode='reflect')
    return imout

def create_train_data(images, classes=5):
    total = len(images) 
    imgs = np.ndarray((total, img_rows, img_cols, 3), dtype=np.float32)
    imgs_mask = np.ndarray((total, img_rows, img_cols, classes), dtype=np.uint8)
    i = 0
    print('-'*30)
    print('Creating training images...')
    print('-'*30)
    for image_mask_name in images:
        image_name = image_mask_name.split('.')[0] + '.jpg'
        img = imread(os.path.join(train_data_path, image_name), as_grey=False)
        img_mask = np.load(os.path.join(mask_data_path,'train', image_mask_name))
        img = resize(img, (img_rows, img_cols), mode='reflect')
        img_mask = multi_resize(img_mask)

        img = np.array([img])
        img_mask = np.array([img_mask])

        imgs[i] = img
        imgs_mask[i] = img_mask

        if i % 500 == 0:
            print('Done: {0}/{1} images'.format(i, total))
        i += 1
    print('Loading done.')
    return imgs, imgs_mask

def test_generator(df, input_folder, batch_size = 16):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
        else:
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], img_rows, img_cols, 3)).astype('float32')
        i = 0
        for index,row in batch_df.iterrows():
            img = imread(os.path.join(data_path, input_folder, row[0]), as_grey=False)
            img = resize(img, (img_rows, img_cols), mode='reflect')
            x = np.array([img])
            x -= mean
            x /= std
            batch_x[i] = x
            i += 1
        if batch_index%300 == 0: print(batch_index)
        yield(batch_x.transpose(0, 3, 1, 2))

def testchk_generator(df, input_folder, batch_size = 16):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
        else:
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], img_rows, img_cols, 3)).astype('float32')
        i = 0
        for index,row in batch_df.iterrows():
            img = imread(os.path.join(data_path, input_folder, row[0]), as_grey=False)
            img = resize(img, (img_rows, img_cols), mode='reflect')
            x = np.array([img])
            x -= mean
            x /= std
            batch_x[i] = x
            i += 1
        if batch_index%300 == 0: print(batch_index)
        return(batch_x.transpose(0, 3, 1, 2))

In [None]:
images = os.listdir(os.path.join(mask_data_path,'train'))
images = [i for i in images if '.npy' in i]
img_folds = [[i for i in images if int(i.split('_')[0])%2==0], [i for i in images if int(i.split('_')[0])%2==1]]
len(images)

7973

In [None]:
for fold in range(2):
    print('-'*30)
    print('Upload data...')
    print('-'*30)
    imgs_train, imgs_mask_train = create_train_data(img_folds[fold], classes=5)
    imgs_train = preprocess_img(imgs_train)

    print('-'*30)
    print('Process data...')
    print('-'*30)
    imgs_mask_train = preprocess_mask(imgs_mask_train)
    imgs_train = imgs_train.astype('float32')
    mean = np.mean(imgs_train)  # mean for data centering
    std = np.std(imgs_train)  # std for data normalization

    imgs_train -= mean
    imgs_train /= std

    imgs_train = imgs_train[:,:,:,:,0]
    imgs_mask_train = imgs_mask_train[:,:,:,:,0]

    #print(imgs_mask_train[100].shape)
    #print(imgs_mask_train[100].dtype)
    #show_mask(imgs_mask_train[100])

    #del model
    print('-'*30)
    print('Creating and compiling model...')
    print('-'*30)
    model = create_model()
    model_checkpoint = ModelCheckpoint('weights_class_fold'+str(fold)+'.h5', monitor='val_loss', save_best_only=True)

    print('-'*30)
    print('Fitting model...')
    print('-'*30)
    optim = Adam(lr=.001)
    model.compile(optimizer=optim, loss=dice_coef_loss, metrics=[dice_coef])
    model.fit(imgs_train.transpose(0, 3, 1, 2), imgs_mask_train.transpose(0, 3, 1, 2), batch_size=batch_size, 
              verbose=1, shuffle=True, nb_epoch=nb_epoch,
              validation_split=0.2,
              callbacks=[model_checkpoint])

    print('-'*30)
    print('Predicting masks...')
    print('-'*30)
    imgs_test, tmp  = create_train_data(img_folds[abs(fold-1)], classes=5)
    imgs_test = preprocess_img(imgs_test)
    del tmp
    gc.collect()

    imgs_test -= mean
    imgs_test /= std
    imgs_test = imgs_test[:,:,:,:,0]

    print('-'*30)
    print('Predicting masks on test data...')
    print('-'*30)
    imgs_mask_test = model.predict(imgs_test.transpose(0, 3, 1, 2), batch_size=16, verbose=1)
    imgs_mask_test = imgs_mask_test.transpose(0, 2, 3, 1).astype(np.uint8)

    print('-' * 30)
    print('Saving predicted masks to files...')
    print('-' * 30)
    pred_dir = os.path.join(mask_data_path, 'traincv')
    if not os.path.exists(pred_dir):
        os.mkdir(pred_dir)
    for image, image_id in zip(imgs_mask_test, img_folds[abs(fold-1)]):
        np.save(os.path.join(pred_dir, image_id), image)

------------------------------
Upload data...
------------------------------
------------------------------
Creating training images...
------------------------------
Done: 0/4027 images
Done: 500/4027 images
