## Train sea lion classifier with a convnet

In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/03/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/05/'
IMAGE_DIMS = (84,84,3)

INPUT_DATASET_NAME = 'lion-patches-0px-balanced'
LOAD_WEIGHTS_FILE = OUTPUT_DIR + 'last-weights.h5'
SAVE_WEIGHTS_FILE = LOAD_WEIGHTS_FILE

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

import keras
from keras.preprocessing.image import ImageDataGenerator

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions

Using TensorFlow backend.


## Training

### Prepare output dir

In [None]:
utils.mkdirs(OUTPUT_DIR, dirs=['tf-logs','weights'], recreate=False)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
tf_logs_dir = OUTPUT_DIR + '/tf-logs/'
weights_file = OUTPUT_DIR + 'weights-{epoch:02d}-{val_acc:.2f}.h5'
input_dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)

logger.info('Output dirs created')

### Prepare CNN model

In [None]:
logger.info('Load CNN model for training')
model = lions.convnet_alexnet_lion_keras(IMAGE_DIMS)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

### Train model

In [None]:
logger.info('Using dataset ' + input_dataset_path + ' as input')

image_generator = ImageDataGenerator(
        featurewise_center=True,
        samplewise_center=False,
        featurewise_std_normalization=True,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=360,
        width_shift_range=0,
        height_shift_range=0,
        horizontal_flip=False,
        vertical_flip=False)

logger.info('loading input data')
#X_train,Y_train = utils.dataset_xy_hdf5matrix_keras(input_dataset_path, 0, 0.8)
#X_validation,Y_validation = utils.dataset_xy_hdf5matrix_keras(input_dataset_path, 0.8, 0.9)
#logger.info('X shape ' + str(X_train.shape))
#logger.info('Y shape ' + str(Y_train.shape))

if(os.path.isfile(LOAD_WEIGHTS_FILE)):
    logger.info('Loading previous weights...')
    model.load_weights(WEIGHTS_FILE)

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=tf_logs_dir, histogram_freq=0, write_graph=True, write_images=True)
checkpoint_callback = keras.callbacks.ModelCheckpoint(weights_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
progbar_callback = keras.callbacks.ProgbarLogger(count_mode='steps')

logger.info('Starting CNN training...')

with h5py.File(input_dataset_path, 'r') as h5file:
    train_batch_generator = utils.batch_generator_xy_h5(h5file, start_ratio=0, end_ratio=0.8, batch_size=32)
    train_generator = utils.image_batch_xy(train_batch_generator, image_generator)
    train_epoch_size = utils.dataset_size_h5(h5file, start_ratio=0, end_ratio=0.8)

    validate_generator = utils.batch_generator_xy_h5(h5file, start_ratio=0.8, end_ratio=0.9, batch_size=32)
    validate_size = utils.dataset_size_h5(h5file, start_ratio=0.8, end_ratio=0.9)
    
    history = model.fit_generator(train_generator,
                  samples_per_epoch = train_epoch_size,
                  nb_epoch = 1, 
                  callbacks = [tensorboard_callback, checkpoint_callback, progbar_callback],
                  validation_data = validate_generator, 
                  validation_steps = validate_size,
                  verbose = 1)

    if(SAVE_WEIGHTS_FILE!=None):
        logger.info('Saving last weights...')
        model.save_weights(SAVE_WEIGHTS_FILE)

    cnn.show_training_info_keras(history)    

### Evaluate results

In [None]:
logger.info('Evaluate dataset')
dataset_path = INPUT_DIR + utils.dataset_name('lion-patches', IMAGE_DIMS)

X_test,Y_test = utils.dataset_xy_hdf5matrix_keras(input_dataset_path, 0.9, 1)
cnn.evaluate_dataset_keras(X_test, Y_test, model, batch_size=24, detailed=True)

In [None]:
# with h5py.File(input_dataset_path, 'r') as h5file:
#     batch_generator = batch_generator_xy_h5(h5file, start_ratio=0, end_ratio=1, batch_size=4, x_dataset='X', y_dataset='Y')
#     train_generator = image_batch_xy(batch_generator, image_data_generator)
#     counter = 0
#     for x, y in train_generator:
#         print(y)
#         utils.show_images(x)
#         counter += 1
#         if(counter>30): 
#             break