## Generate competition submission

In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/20/'

IMAGE_DIMS = (42,42,3)
LABEL_DIMS = (6,)
LOAD_WEIGHTS_FILE = INPUT_DIR + '05/weights-medium1-42x42-0.94.h5'
LOAD_MODEL_FILE = None
DEBUG = True
#IMAGE_SLICE = slice(1400,1800)
IMAGE_SLICE = slice(0,99999)

NR_SHARDS = 4
RECREATE_OUTPUT = True

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob
import cv2
from multiprocessing import Pool
import multiprocessing

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging as logging
import modules.cnn as cnn
import modules.lions as lions
import modules.shards as shards
import modules.objectdetect as objectdetect

Using TensorFlow backend.


## Classify and count lions on each image

### Find lions on all images

In [3]:
#from MismatchedTrainImages.txt
MISMATCHED = [3, 7, 9, 21, 30, 34, 71, 81, 89, 97, 151, 184, 215, 234, 242, 268, 290, 311, 331, 344, 380, 384, 406, 421, 469, 475, 490, 499, 507, 530, 531, 605, 607, 614, 621, 638, 644, 687, 712, 721, 767, 779, 781, 794, 800, 811, 839, 840, 869, 882, 901, 903, 905, 909, 913, 927, 946]
FORCE_IMAGES = [42]
FORCE_IMAGES = None

In [4]:
def process_shard_lions(shard_group, shard_id):
    t = Timer('PROCESSING SHARD {}'.format(shard_id))

    output_dir = shard_group.shard_dir(shard_id)
    logging.setup_file_logger(output_dir + 'out.log')
    
    logger.info('Load CNN model')
    #lion simple cnn forward time: ~10ms
    print(LOAD_WEIGHTS_FILE)
    model = lions.convnet_medium1_lion_keras(IMAGE_DIMS)
    model.load_weights(LOAD_WEIGHTS_FILE)    

    #DECLARE FUNCTIONS INSIDE PROCESS INSTANCE
    """ Returns (score, label) """
    def evaluate_region_all_classes(region_img):
        y_pred = model.predict(np.array([region_img]))
        ylp = utils.onehot_to_label(np.array(y_pred))
        return y_pred[0][ylp[0]], ylp[0]

    
    def detect_lions(image, evaluate_function):
        #search for lions
        region_generator = objectdetect.sliding_window_generator(image, step=(19,19), window=IMAGE_DIMS, pyramid_max_layers=1)
        detections, imgs = objectdetect.evaluate_regions(region_generator, evaluate_function, filter_score_min=0.97, 
                                                         filter_labels=(0,1,2,3,4), apply_non_max_suppression=True, 
                                                         supression_overlap_threshold=0.1, threads=None)
        #calculate detection class distribution
        detected_lions = np.zeros(LABEL_DIMS[0]-1, dtype='int')
        for detection in detections:
            label = int(detection[5])
            detected_lions[label] += 1 

        if(DEBUG):
            def detection_to_colortext(detection):
                score = detection[4]
                text = str(int(detection[5])) + ' ' + '{0:.2f}'.format(score)
                c = int(score*255)
                return (0,0,c), text

            img2 = image.copy()
            objectdetect.draw_detections(detections, img2, detection_to_colortext)
            utils.show_image(img2, size=60, is_bgr=True)
    #        patches = objectdetect.extract_patches(detections, img)
    #        utils.show_images(patches[0:50], size=2, cols=10, is_bgr=True)   

        return detected_lions.tolist()

    if(shard_group.shard_done(shard_id)):
        logger.warning('shard {} already processed. Skipping'.format(shard_id))

    else:
        image_paths = shard_group.shard_items(shard_id)

        total_detected_lions = []

        for image_path in image_paths:
            base = os.path.basename(image_path)
            fn = os.path.splitext(base)
            if(fn[1]!='.jpg'):
                logger.info('ignoring non jpg image. filename=' + base)
                continue

            train_id = int(fn[0])

            if(FORCE_IMAGES!=None and train_id not in FORCE_IMAGES):
                continue

            t = Timer('processing photo ' + image_path)
            image_raw = cv2.imread(image_path)
            image_raw = image_raw[IMAGE_SLICE]
            detected_lions = detect_lions(image_raw, evaluate_region_all_classes)
            logger.info('image ' + str(train_id))
            logger.info('total detections: ' + str(np.sum(np.array(detected_lions))))
            logger.info('class detections: ' + str(detected_lions))
            total_detected_lions += [[train_id] + detected_lions]
            t.stop()

        logger.info('GENERATE SUBMISSION FILE')
        submission_file = output_dir + 'submission.csv'
        df = pd.DataFrame(total_detected_lions, columns=('test_id','adult_males','subadult_males','adult_females','juveniles','pups'))
        df.to_csv(submission_file, index=False)

        logger.info('detection result exported to ' + submission_file)        
        shard_group.mark_done(shard_id)

        t.stop()


In [5]:
logging.setup_file_logger(OUTPUT_DIR + 'out.log')
logger.info('==== PROCESSING SHARDS IN PARALLEL ====')

logger.info('preparing shards')
images_dir = INPUT_DIR + "Train/"
image_paths = [images_dir+n for n in os.listdir(images_dir)]

shard_group = shards.ShardGroup(image_paths, NR_SHARDS, OUTPUT_DIR, recreate_shards_dir=RECREATE_OUTPUT)

2017-06-05 02:52:38,562 INFO ==== PROCESSING SHARDS IN PARALLEL ====
2017-06-05 02:52:38,564 INFO preparing shards


In [6]:
shard_group.start_processing(process_shard_lions, shard_ids=None)
logger.info('==== ALL DONE ====')

2017-06-05 02:52:38,589 INFO Using 4 parallel tasks
2017-06-05 02:52:40,620 INFO > [started] PROCESSING SHARD 4...
2017-06-05 02:52:40,629 INFO Load CNN model


../../input/kaggle-sea-lion/05/weights-medium1-42x42-0.94.h5


  model.add(convolutional.Conv2D(64, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(128, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(256, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(6, activation='softmax', init='glorot_uniform'))
2017-06-05 02:52:42,622 INFO > [started] PROCESSING SHARD 2...
2017-06-05 02:52:42,629 INFO Load CNN model


../../input/kaggle-sea-lion/05/weights-medium1-42x42-0.94.h5


  model.add(convolutional.Conv2D(64, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(128, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(256, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(6, activation='softmax', init='glorot_uniform'))
2017-06-05 02:52:44,624 INFO > [started] PROCESSING SHARD 3...
2017-06-05 02:52:44,634 INFO Load CNN model


../../input/kaggle-sea-lion/05/weights-medium1-42x42-0.94.h5


  model.add(convolutional.Conv2D(64, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(128, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(256, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(6, activation='softmax', init='glorot_uniform'))
2017-06-05 02:52:46,625 INFO > [started] PROCESSING SHARD 1...
2017-06-05 02:52:46,631 INFO Load CNN model


../../input/kaggle-sea-lion/05/weights-medium1-42x42-0.94.h5


  model.add(convolutional.Conv2D(64, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(128, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(convolutional.Conv2D(256, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
  model.add(core.Dense(6, activation='softmax', init='glorot_uniform'))
2017-06-05 02:52:47,111 INFO ==== ALL DONE ====


## Combine all shard results 

In [37]:
logger.info('COMBINING ALL SHARD RESULTS INTO ONE SUBMISSION FILE')

total_detected_lions = np.array([])
for sd in shard_group.shard_dirs():
    df = pd.read_csv(sd + 'submission.csv')
    if(len(total_detected_lions)>0):
        total_detected_lions = np.concatenate((total_detected_lions, df.as_matrix()))
    else:
        total_detected_lions = df.as_matrix()

submission_file = OUTPUT_DIR + 'submission-merged.csv'
df = pd.DataFrame(total_detected_lions, columns=('test_id','adult_males','subadult_males','adult_females','juveniles','pups'))
df = df.sort_values('test_id')
df.to_csv(submission_file, index=False)
logger.info('submission file merged')

2017-06-05 03:11:00,951 INFO COMBINING ALL SHARD RESULTS INTO ONE SUBMISSION FILE
2017-06-05 03:11:00,976 INFO submission file merged
