## Train sea lion classifier with a convnet

In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/03/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/05/'
IMAGE_DIMS = (84,84,3)

INPUT_DATASET_NAME = 'lion-patches-0px-balanced'
LOAD_WEIGHTS_FILE = OUTPUT_DIR + 'last-weights.h5'
SAVE_WEIGHTS_FILE = LOAD_WEIGHTS_FILE

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

import keras
from keras.preprocessing.image import ImageDataGenerator

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions

Using TensorFlow backend.


## Training

### Prepare output dir

In [3]:
utils.mkdirs(OUTPUT_DIR, dirs=['tf-logs','weights'], recreate=False)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
tf_logs_dir = OUTPUT_DIR + '/tf-logs/'
weights_file = OUTPUT_DIR + 'weights-{epoch:02d}-{val_acc:.2f}.h5'
logger.info('Output dirs created')

2017-04-10 04:22:56,668 INFO Output dirs created


### Prepare CNN model

In [4]:
logger.info('Load CNN model for training')
model = lions.convnet_alexnet_lion_keras(IMAGE_DIMS)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

2017-04-10 04:22:56,676 INFO Load CNN model for training
  conv_1 = convolutional.Convolution2D(96, 11, 11, border_mode='valid', name="conv_1", activation='relu', init='glorot_uniform')(input)
  conv_2 = convolutional.Convolution2D(256, 3, 3, border_mode='valid', name="conv_2", activation='relu', init='glorot_uniform')(zero_padding_1)
  conv_3 = convolutional.Convolution2D(384, 3, 3, border_mode='valid', name="conv_3", activation='relu', init='glorot_uniform')(zero_padding_2)
  conv_4 = convolutional.Convolution2D(384, 3, 3, border_mode='valid', name="conv_4", activation='relu', init='glorot_uniform')(conv_3)
  conv_5 = convolutional.Convolution2D(256, 3, 3, border_mode='valid', name="conv_5", activation='relu', init='glorot_uniform')(conv_4)
  fc_1 = core.Dense(4096, name="fc_1", activation='relu', init='glorot_uniform')(flatten)
  output = core.Dense(4096, name="Output", activation='relu', init='glorot_uniform')(fc_1)
  fc_2 = core.Dense(NR_CLASSES, name="fc_2", activation='softmax',

### Train model

In [5]:
dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
logger.info('Using dataset ' + dataset_path + ' as input')

datagenerator = ImageDataGenerator(
        featurewise_center=True,
        samplewise_center=False,
        featurewise_std_normalization=True,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=360,
        width_shift_range=0,
        height_shift_range=0,
        horizontal_flip=False,
        vertical_flip=False)

with h5py.File(dataset_path, 'r') as hdf5:
    logger.info('loading input data')
    X_train,Y_train = utils.dataset_xy_range(hdf5, 0, 0.8)
    X_validation,Y_validation = utils.dataset_xy_range(hdf5, 0.8, 0.9)
    logger.info('X shape ' + str(X_train.shape))
    logger.info('Y shape ' + str(Y_train.shape))

    if(os.path.isfile(LOAD_WEIGHTS_FILE)):
        logger.info('Loading previous weights...')
        model.load_weights(WEIGHTS_FILE)

    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=tf_logs_dir, histogram_freq=0, write_graph=True, write_images=True)
    checkpoint_callback = keras.callbacks.ModelCheckpoint(weights_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    progbar_callback = keras.callbacks.ProgbarLogger(count_mode='samples')
    
    logger.info('Starting CNN training...')
    history = model.fit_generator(datagenerator.flow(X_train, Y_train, batch_size = 16, shuffle = False),
                         samples_per_epoch = len(X_train), 
                         nb_epoch = 1, 
                         callbacks = [tensorboard_callback, checkpoint_callback, progbar_callback],
                         validation_data = (X_validation, Y_validation), 
                         verbose = 1, 
                         show_accuracy = True)    

    if(SAVE_WEIGHTS_FILE!=None):
        logger.info('Saving last weights...')
        model.save_weights(SAVE_WEIGHTS_FILE)
    
    cnn.show_training_info_keras(history)    

2017-04-10 04:22:56,890 INFO Using dataset ../../input/kaggle-sea-lion/03/lion-patches-0px-balanced-84-84.h5 as input
2017-04-10 04:22:56,892 INFO loading input data
2017-04-10 04:23:03,793 INFO X shape (37462, 84, 84, 3)
2017-04-10 04:23:03,794 INFO Y shape (37462, 6)
2017-04-10 04:23:03,796 INFO Starting CNN training...


MemoryError: 

### Evaluate results

In [None]:
logger.info('Evaluate dataset')
dataset_path = INPUT_DIR + utils.dataset_name('lion-patches', IMAGE_DIMS)

with h5py.File(dataset_path, 'r') as hdf5:
    X_test,Y_test = utils.dataset_xy_range(hdf5, 0.9, 1)
    cnn.evaluate_dataset_keras(X, Y, model, batch_size=24, detailed=True)