## Train sea lion classifier with a convnet

In [1]:
INPUT_DIR = '../../output/kaggle-sea-lion/02/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/03/'
IMAGE_DIMS = (148,148,3)

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn

## Training

### Prepare output dir

In [3]:
utils.mkdirs(OUTPUT_DIR, recreate=False)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
logger.info('Dir ' + OUTPUT_DIR + ' created')

2017-04-02 19:23:40,311 INFO Dir ../../output/kaggle-sea-lion/03/ created


### Prepare CNN model

In [None]:
logger.info('Prepare CNN for training')
network = cnn.net_alexnet_lion(IMAGE_DIMS)
model = cnn.prepare_cnn_model(network, OUTPUT_DIR, model_file=None)

2017-04-02 19:23:40,317 INFO Prepare CNN for training
2017-04-02 19:23:40,442 INFO Prepare CNN
2017-04-02 19:23:40,444 INFO Preparing output dir
2017-04-02 19:23:40,445 INFO Initializing network...
2017-04-02 19:23:43,015 INFO Network initialized


### Train model

In [None]:
dataset_path = INPUT_DIR + utils.dataset_name('lion-patches', IMAGE_DIMS)

with h5py.File(dataset_path, 'r') as hdf5:
    X,Y = utils.dataset_xy_range(hdf5, 0, 0.9)
    logger.info('X shape ' + str(X.shape))
    logger.info('Y shape ' + str(Y.shape))

    logger.info('Starting CNN training...')
    model.fit(X, Y, 
        validation_set=0.2,
        shuffle=True, 
        batch_size=96, 
        n_epoch=10,
        show_metric=True,
        snapshot_epoch=False,
        run_id='sea_lion_classifier')

model.save(OUTPUT_DIR + "sea-lion-classifier.tfl")
logger.info("Network trained and saved as sea-lion-classifier.tfl!")

Training Step: 29  | total loss: [1m[32m1.39137[0m[0m | time: 80.992s
[2K| Momentum | epoch: 004 | loss: 1.39137 - acc: 0.5203 -- iter: 480/680


### Evaluate results

In [None]:
logger.info('Evaluate dataset')
with h5py.File(dataset_path, 'r') as hdf5:
    X,Y = utils.dataset_xy_range(hdf5, 0.9, 1)
    evaluate_dataset(X, Y, model, batch_size=24, confusion_matrix=True, start_ratio=0.9, end_ratio=1):