## Train sea lion classifier with a convnet

In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/03/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/05/'
#IMAGE_DIMS = (84,84,3)
IMAGE_DIMS = (32,32,3)

INPUT_DATASET_NAME = 'lion-patches-0px-balanced'
SAVE_WEIGHTS_FILE = OUTPUT_DIR + 'last-weights.h5'
LOAD_WEIGHTS_FILE = None

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

import keras
from keras.preprocessing.image import ImageDataGenerator

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions

Using TensorFlow backend.


## Training

### Prepare output dir

In [3]:
utils.mkdirs(OUTPUT_DIR, dirs=['tf-logs','weights'], recreate=False)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
tf_logs_dir = OUTPUT_DIR + '/tf-logs/'
weights_file = OUTPUT_DIR + 'weights-{epoch:02d}-{val_acc:.2f}.h5'
input_dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)

logger.info('Output dirs created')

2017-04-10 20:20:27,024 INFO Output dirs created


### Prepare CNN model

In [None]:
logger.info('Load CNN model for training')
#model = lions.convnet_alexnet2_lion_keras(IMAGE_DIMS)
model = lions.convnet_simple_lion_keras(IMAGE_DIMS)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

2017-04-10 20:20:27,033 INFO Load CNN model for training


### Train model

In [None]:
logger.info('Using dataset ' + input_dataset_path + ' as input')

if(LOAD_WEIGHTS_FILE!=None and os.path.isfile(LOAD_WEIGHTS_FILE)):
    logger.info('Loading previous weights...')
    model.load_weights(WEIGHTS_FILE)

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=tf_logs_dir, histogram_freq=0, write_graph=True, write_images=True)
tensorboard_callback.set_model(model)
checkpoint_callback = keras.callbacks.ModelCheckpoint(weights_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
progbar_callback = keras.callbacks.ProgbarLogger(count_mode='steps')
logger_callback = cnn.LoggingLogger()

logger.info('Starting CNN training...')

BATCH_SIZE=64

with h5py.File(input_dataset_path, 'r') as h5file:
    logger.info('preparing input data')
    train_batch_generator = utils.batch_generator_xy_h5(h5file, start_ratio=0, end_ratio=0.8, batch_size=BATCH_SIZE)
    image_generator = ImageDataGenerator(
            featurewise_center=True,
            samplewise_center=False,
            featurewise_std_normalization=True,
            samplewise_std_normalization=False,
            zca_whitening=False,
            rotation_range=360,
            width_shift_range=0,
            height_shift_range=0,
            horizontal_flip=False,
            vertical_flip=False)
    train_generator = utils.image_batch_xy(train_batch_generator, image_generator)
    start_pos, end_pos, train_size, train_nr_batches = utils.dataset_h5_batch_info(h5file, start_ratio=0, end_ratio=0.8, batch_size=BATCH_SIZE)
    print('train size ' + str(train_size) + ' ' + str(train_nr_batches))

    validate_generator = utils.batch_generator_xy_h5(h5file, start_ratio=0.8, end_ratio=0.9, batch_size=BATCH_SIZE)
    start_pos, end_pos, validate_size, validate_nr_batches = utils.dataset_h5_batch_info(h5file, start_ratio=0.8, end_ratio=0.9, batch_size=BATCH_SIZE)
    print('validate size ' + str(validate_size) + ' ' + str(validate_nr_batches))
    
#     c = 0
#     for x, y in train_batch_generator:
#         print(type(y))
#         utils.show_images(x, image_labels=utils.categorical_to_label(y))
#         c += 1
#         if(c>3): break
    
    history = model.fit_generator(train_generator,
                  steps_per_epoch = train_nr_batches,
                  nb_epoch = 20, 
                  callbacks = [tensorboard_callback, checkpoint_callback, progbar_callback, logger_callback],
                  validation_data = validate_generator, 
                  validation_steps = validate_nr_batches,
                  verbose = 1)

    if(SAVE_WEIGHTS_FILE!=None):
        logger.info('Saving last weights...')
        model.save_weights(SAVE_WEIGHTS_FILE)

2017-04-10 20:20:27,215 INFO Using dataset ../../input/kaggle-sea-lion/03/lion-patches-0px-balanced-32-32.h5 as input
2017-04-10 20:20:27,334 INFO Starting CNN training...
2017-04-10 20:20:27,336 INFO preparing input data


train size 111948 1749.0
validate size 13994 218.0
Epoch 1/20
Epoch 1/20






### Evaluate results

In [None]:
logger.info('Evaluate dataset')
cnn.show_training_info_keras(history)    
print(history.history)

In [None]:
dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
with h5py.File(input_dataset_path, 'r') as h5file:
    start_pos, end_pos, test_size, test_nr_batches = utils.dataset_h5_batch_info(h5file, start_ratio=0.9, end_ratio=1, batch_size=BATCH_SIZE)
    test_batch_generator = utils.batch_generator_xy_h5(h5file, start_ratio=0.9, end_ratio=1, batch_size=32)
    
    X_test, Y_test = utils.xy_generator_to_array(test_batch_generator, test_nr_batches)
    cnn.evaluate_dataset_keras(X_test, Y_test, model, batch_size=batch_size)

In [None]:
# with h5py.File(input_dataset_path, 'r') as h5file:
#     batch_generator = batch_generator_xy_h5(h5file, start_ratio=0, end_ratio=1, batch_size=4, x_dataset='X', y_dataset='Y')
#     train_generator = image_batch_xy(batch_generator, image_data_generator)
#     counter = 0
#     for x, y in train_generator:
#         print(y)
#         utils.show_images(x)
#         counter += 1
#         if(counter>30): 
#             break