In [1]:
import os, random, glob, pickle, collections, math
import numpy as np
import pandas as pd
import ujson as json
from PIL import Image

from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline 

from keras.models import Sequential, Model, load_model, model_from_json
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K
K.set_image_dim_ordering('tf')

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
TRAIN_DIR = '../data/fish/train-all/'
TEST_DIR =  '../data/fish/test/' #'../RFCN/JPEGImages/'
# RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
CHECKPOINT_DIR = './checkpoints/checkpoint01/'
LOG_DIR = './logs/log01/'
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BATCHSIZE = 128
LEARNINGRATE = 1e-4
BG_THRESH_HI = 0.3
BG_THRESH_LO = 0.1
p=16

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    return cropped

def preprocess_input(x):
    #resnet50 image preprocessing
    # 'RGB'->'BGR'
    x = x[:, :, ::-1]
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x

In [3]:
file_name = 'GTbbox_df.pickle'
if os.path.exists('../data/'+file_name):
    print ('Loading from file '+file_name)
    GTbbox_df = pd.read_pickle('../data/'+file_name)
else:
    print ('Generating file '+file_name)       
    GTbbox_df = pd.DataFrame(columns=['image_folder', 'image_file','crop_index','crop_class','xmin','ymin','xmax','ymax'])  

    crop_classes=FISH_CLASSES[:]
    crop_classes.remove('NoF')

    for c in crop_classes:
        print(c)
        j = json.load(open('../data/fish/annos1/{}.json'.format(c), 'r'))
        for l in j: 
            filename = l["filename"]
            head, image_file = os.path.split(filename)
            basename, file_extension = os.path.splitext(image_file) 
            image = Image.open(TRAIN_DIR+c+'/'+image_file)
            width_image, height_image = image.size
            for i in range(len(l["annotations"])):
                a = l["annotations"][i]
                xmin = (a["x"])
                ymin = (a["y"])
                width = (a["width"])
                height = (a["height"])
                delta_width = p/(COLS-2*p)*width
                delta_height = p/(ROWS-2*p)*height
                xmin_expand = xmin-delta_width
                ymin_expand = ymin-delta_height
                xmax_expand = xmin+width+delta_width
                ymax_expand = ymin+height+delta_height
                assert max(xmin_expand,0)<min(xmax_expand,width_image)
                assert max(ymin_expand,0)<min(ymax_expand,height_image)
                GTbbox_df.loc[len(GTbbox_df)]=[c, image_file,i,a["class"],max(xmin_expand,0),max(ymin_expand,0),min(xmax_expand,width_image),min(ymax_expand,height_image)]
                if a["class"] != c: print(GTbbox_df.tail(1))
                    
    GTbbox_df.to_pickle('../data/'+file_name)

Loading from file GTbbox_df.pickle


In [4]:
GTbbox_df[:2]

Unnamed: 0,image_folder,image_file,crop_index,crop_class,xmin,ymin,xmax,ymax
0,ALB,img_00003.jpg,0.0,ALB,377.0,66.0,730.0,173.0
1,ALB,img_00003.jpg,1.0,ALB,670.0,95.0,1008.0,219.0


In [5]:
def IoU(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = (xB - xA) * (yB - yA)
    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def bg_discrimination(box, gt_bboxes):
    # if BG_THRESH_LO<IoU<BG_THRESH_HI then box is bg return True
    IoUs = []
    for gt_box in gt_bboxes:
        IoUs.append(IoU(box, gt_box))
    if IoUs:
        if max(IoUs) > BG_THRESH_LO and max(IoUs) < BG_THRESH_HI:
            return True
        else:
            return False        

In [6]:
if False:
    file_name = 'NoFbbox_df.pickle'
    if os.path.exists('../data/'+file_name):
        print ('Loading from file '+file_name)
        NoFbbox_df = pd.read_pickle('../data/'+file_name)
    else:
        print ('Generating file '+file_name)
        with open('../data/RFCN_detections/detections_RoIs_'+RFCN_MODEL+'.pkl','rb') as f:
            detections_RoIs = pickle.load(f, encoding='latin1')
        train_detections_RoIs = detections_RoIs[1000:]
        with open("../RFCN/ImageSets/Main/test.txt","r") as f:
            test_files = f.readlines()
        train_files = test_files[1000:]
        assert len(train_detections_RoIs) == len(train_files)

        NoFbbox_df = pd.DataFrame(columns=['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
                                           'NoF_RFCN','ALB_RFCN','BET_RFCN','DOL_RFCN','LAG_RFCN','OTHER_RFCN','SHARK_RFCN','YFT_RFCN']) 

        for im in range(len(train_files)):
            if im%1000 == 0: print(im)
            image_file = train_files[im][:-1]+'.jpg'
            image = Image.open(TEST_DIR+image_file)
            width_image, height_image = image.size
            gt_bboxes = GTbbox_df.loc[GTbbox_df['image_file'] == image_file, ['xmin','ymin','xmax','ymax']].values

            bboxes = []
            detects_im = train_detections_RoIs[im]
            for i in range(len(detects_im)):
                if bg_discrimination(detects_im[i,:4], gt_bboxes):
                    bboxes.append(detects_im[i,:]) 

            for j in range(len(bboxes)):    
                bbox = bboxes[j]
                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]
                width = xmax-xmin
                height = ymax-ymin
                delta_width = p/(COLS-2*p)*width
                delta_height = p/(ROWS-2*p)*height
                xmin_expand = xmin-delta_width
                ymin_expand = ymin-delta_height
                xmax_expand = xmax+delta_width
                ymax_expand = ymax+delta_height
                assert max(xmin_expand,0)<min(xmax_expand,width_image)
                assert max(ymin_expand,0)<min(ymax_expand,height_image)
                NoFbbox_df.loc[len(NoFbbox_df)]=[image_file,j,'NoF',max(xmin_expand,0),max(ymin_expand,0),
                                                 min(xmax_expand,width_image),min(ymax_expand,height_image)]+bbox[4:].tolist()

        NoFbbox_df.to_pickle('../data/'+file_name)


In [7]:
def train_generator(datagen, df):
    while 1:
        batch_x = np.zeros((BATCHSIZE, ROWS, COLS, 3), dtype=K.floatx())
        batch_y = np.zeros((BATCHSIZE, len(FISH_CLASSES)), dtype=K.floatx())
        fn = lambda obj: obj.loc[np.random.choice(obj.index, size=nb_perClass, replace=False),:]
        batch_df = df.groupby('crop_class', as_index=True).apply(fn)
        i = 0
        for index,row in batch_df.iterrows():
            row = row.tolist()
            image_file = os.path.join(row[0], row[1])
            fish = row[3]
            bbox = row[4:8]
            cropped = load_img(TRAIN_DIR+image_file,bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            x = datagen.random_transform(x)
            x = preprocess_input(x)
            batch_x[i] = x
            batch_y[i,FISH_CLASSES.index(fish)] = 1
            i += 1
        yield (batch_x, batch_y)

train_datagen = ImageDataGenerator(
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)

In [8]:
#train data prepare
train_df, valid_df = train_test_split(GTbbox_df, test_size = 0.2, random_state=1986, stratify=GTbbox_df['crop_class'])

nb_perClass = int(BATCHSIZE / len(FISH_CLASSES)) 
samples_per_epoch=BATCHSIZE*math.ceil(train_df.groupby('crop_class').size()['ALB']/nb_perClass)

In [9]:
valid_df[:2]

Unnamed: 0,image_folder,image_file,crop_index,crop_class,xmin,ymin,xmax,ymax
2052,ALB,img_06627.jpg,0.0,ALB,480.0,73.0,759.0,202.0
2746,BET,img_05858.jpg,0.0,BET,469.344766,455.789682,780.688089,689.191276


In [10]:
# validation_data (valid_x,valid_y)
df_1 = valid_df[valid_df.crop_class != 'NoF']
l = valid_df.groupby('crop_class').size()
# l.pop('NoF')
nb_NoF_valid = math.ceil(l.sum()/10)
#df_2 = valid_df[valid_df.crop_class == 'NoF'].sample(n=nb_NoF_valid)
#valid_df = pd.concat([df_1,df_2], axis=0)
valid_x = np.zeros((valid_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
valid_y = np.zeros((valid_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
i = 0
for index,row in valid_df.iterrows():
    row = row.tolist()
    image_file = os.path.join(row[0], row[1])
    fish = row[3]
    bbox = row[4:8]
    cropped = load_img(TRAIN_DIR+image_file,bbox,target_size=(ROWS,COLS))
    x = np.asarray(cropped, dtype=K.floatx())
    x = preprocess_input(x)
    valid_x[i] = x
    valid_y[i,FISH_CLASSES.index(fish)] = 1
    i += 1

In [11]:
valid_y.shape

(875, 8)

In [12]:
#callbacks

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')        

model_checkpoint = ModelCheckpoint(filepath=CHECKPOINT_DIR+'weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
        
learningrate_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, mode='auto', epsilon=0.001, cooldown=0, min_lr=0)

# tensorboard = TensorBoard(log_dir=LOG_DIR, histogram_freq=0, write_graph=False, write_images=True)

In [13]:
#Resnet50
#top layer training

from keras.applications.resnet50 import ResNet50

base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
#x = Flatten()(x)
#x = Dense(256, init='glorot_normal', activation='relu')(x)
#x = LeakyReLU(alpha=0.33)(x)
x = Dropout(0.5)(x)
#x = Dense(256, init='glorot_normal', activation='relu')(x)
x = Dense(256, init='glorot_normal')(x)
x = LeakyReLU(alpha=0.33)(x)
x = Dropout(0.5)(x)
predictions = Dense(len(FISH_CLASSES), init='glorot_normal', activation='softmax')(x)

model = Model(input=base_model.input, output=predictions)

# first: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=LEARNINGRATE)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

(Subtensor{int64}.0, Elemwise{add,no_inplace}.0, Elemwise{add,no_inplace}.0, Subtensor{int64}.0)


In [None]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, None, None, 3) 0           input_1[0][0]                    
____________________________________________________________________________________________________
conv1 (Convolution2D)            (None, None, None, 64 9472        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, None, None, 64 256         conv1[0][0]                      
___________________________________________________________________________________________

In [None]:
# train the model on the new data for a few epochs
model.fit_generator(train_generator(datagen=train_datagen, df=train_df), samples_per_epoch=samples_per_epoch, nb_epoch=30, verbose=1,
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule],  # , tensorboard
                    validation_data=(valid_x,valid_y), nb_worker=3, pickle_safe=True)

Epoch 1/30
Epoch 2/30

In [None]:
# Resnet50
# fine tuning
start_layer = 38

files = glob.glob(CHECKPOINT_DIR+'*')
val_losses = [float(f.split('-')[-1][:-5]) for f in files]
index = val_losses.index(min(val_losses))
print('Loading model from checkpoints file ' + files[index])
model = load_model(files[index])

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(train_generator(datagen=train_datagen, df=train_df), samples_per_epoch=samples_per_epoch, nb_epoch=300, verbose=1,
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(valid_x,valid_y), nb_worker=3, pickle_safe=True)

In [None]:
#resume training

files = glob.glob(CHECKPOINT_DIR+'*')
val_losses = [float(f.split('-')[-1][:-5]) for f in files]
index = val_losses.index(min(val_losses))
print('Loading model from checkpoints file ' + files[index])
model = load_model(files[index])
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5')

model.fit_generator(train_generator(datagen=train_datagen, df=train_df), samples_per_epoch=samples_per_epoch, nb_epoch=300, verbose=1,
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(valid_x,valid_y), nb_worker=3, pickle_safe=True)