In [1]:
import base64
import collections
import copy
import cProfile
import datetime
import gc
import itertools
import json
import math
import os
import operator
import pickle
import random
import re
import shutil
import sys
import time

import bokeh
import cv2
import hyperopt
from hyperopt import hp
import Image
import keras
from keras import *
from keras import backend as K
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL as pil
import prophet
import pyflux
import pylab
import scipy
from scipy import signal
import seaborn as sns
import skimage
import sklearn
from sklearn import *
import statsmodels as sm
import tensorflow as tf
import tqdm

np.random.seed(1337)

%matplotlib inline

sns.set(font_scale=1.3)
mpl.rcParams['figure.figsize'] = 20, 12
sns.set_style('whitegrid')
sns.set_palette(sns.color_palette('muted'))

linewidth = 1.0
dotsize = 15

Using TensorFlow backend.


In [2]:
SPLIT_TEST_SIZE = 0.2

# Load the Data and Split Train/Validation

In [3]:
train_all_raw = pd.read_json('/media/ntfs/data/iceberg_classification/input/train.json')
train_all_size = len(train_all_raw)
print train_all_size

train_all_x = np.zeros((train_all_size, 75, 75, 2))
train_all_y = np.zeros(train_all_size)

for i in range(train_all_size):
    for channel in range(2):
        train_all_x[i, :, :, channel] = (np.asarray(train_all_raw.iloc[i, channel]).reshape(75, 75) + 50) / 50
    train_all_y[i] = train_all_raw.iloc[i, 4]
    
del train_all_raw
gc.collect()

train_x, val_x, train_y, val_y = sklearn.model_selection.train_test_split(
                                    train_all_x,
                                    train_all_y,
                                    test_size=SPLIT_TEST_SIZE)

del train_all_x
del train_all_y
gc.collect()

train_size = len(train_x)
val_size = len(val_x)

print train_size
print val_size

1604
1283
321


# Model Creation

In [4]:
def create_model(
        kernel_size,
    
        n_filters_input,
        activation_input,
        max_pooling_size_input,
        dropout_input,
    
        n_conv_layers,
        n_filters_conv,
        activation_conv,
        max_pooling_size_conv,
        dropout_conv,
    
        n_dense_layers,
        n_dense_neurons,
        activation_dense,
        dropout_dense,
    
        optimizer,
        loss):
    
    model = models.Sequential()

    # input layer
    model.add(layers.Conv2D(
                n_filters_input,
                (kernel_size, kernel_size),
                activation=activation_input,
                input_shape=(75, 75, 2)))
    model.add(layers.MaxPooling2D((max_pooling_size_input, max_pooling_size_input), dim_ordering='th'))
    model.add(keras.layers.Dropout(dropout_input))
    
    # conv layers
    for i in range(n_conv_layers):
        model.add(layers.Conv2D(
                    n_filters_conv,
                    (kernel_size, kernel_size),
                    activation=activation_conv))
        model.add(layers.MaxPooling2D((max_pooling_size_conv, max_pooling_size_conv), dim_ordering='th'))
        model.add(keras.layers.Dropout(dropout_conv))

    model.add(keras.layers.Flatten())
        
    # dense layers
    for i in range(n_dense_layers):
        model.add(keras.layers.Dense(n_dense_neurons, activation=activation_dense))
        model.add(keras.layers.Dropout(dropout_dense))
    
    #sigmoid layer
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=['accuracy'])
    
    return model

# Definitions for HyperOpt

In [5]:
dry_run = False
trials = []

def evaluate_params(params):
    
    log_loss = 100.0
    try:
        #print params
        start_time = time.time()
        
        if not dry_run:
            model = create_model(
                    kernel_size=params['kernel_size'],

                    n_filters_input=params['n_filters_input'],
                    activation_input=params['activation_input'],
                    max_pooling_size_input=params['max_pooling_size_input'],
                    dropout_input=params['dropout_input'],

                    n_conv_layers=params['n_conv_layers'],
                    n_filters_conv=params['n_filters_conv'],
                    activation_conv=params['activation_conv'],
                    max_pooling_size_conv=params['max_pooling_size_conv'],
                    dropout_conv=params['dropout_conv'],

                    n_dense_layers=params['n_dense_layers'],
                    n_dense_neurons=params['n_dense_neurons'],
                    activation_dense=params['activation_dense'],
                    dropout_dense=params['dropout_dense'],

                    optimizer=params['optimizer'],
                    loss=params['loss_fn'])

            history = model.fit(
                    x=train_x,
                    y=train_y,
                    epochs=params['n_epochs'],
                    validation_data=(val_x, val_y),
                    batch_size=params['batch_size'],
                    verbose=0)

            val_acc = np.average(history.history['val_acc'][-10:])
            
            p = model.predict(val_x)
            log_loss = sklearn.metrics.log_loss(val_y, p)
        else:
            val_acc = 0.5
            log_loss = np.random.randn() + 100
            
        if np.isnan(log_loss):
            log_loss = 100.0

        trial = params.copy()
        trial['val_acc'] = val_acc
        trial['log_loss'] = log_loss
        trial['time'] = int(time.time() - start_time)
        trials.append(trial)
        
        is_best = ''
        if log_loss <= np.min([t['log_loss'] for t in trials]):
            is_best = '*'

        print 'trial={0:04d}     time={1:04}s     log_loss={2:2.5f}{3}'.format(
            len(trials),
            trial['time'],
            trial['log_loss'],
            is_best)
    except:
        pass
    
    return log_loss

In [6]:
space = {
    'kernel_size': hp.choice('kernel_size', [3]),
    
    'n_filters_input': 2 ** (3 + hp.randint('n_filters_input', 3)),
    'activation_input': hp.choice('activation_input', ['tanh', 'relu']),
    'max_pooling_size_input': hp.choice('max_pooling_size_input', [2]),
    'dropout_input': 0.2 + hp.randint('dropout_input', 7) / 10.0,
    
    'n_conv_layers': 2 + hp.randint('n_conv_layers', 3),
    'n_filters_conv': 2 ** (6 + hp.randint('n_filters_conv', 4)),
    'activation_conv': hp.choice('activation_conv', ['tanh', 'relu']),
    'max_pooling_size_conv': hp.choice('max_pooling_size_conv', [2]),
    'dropout_conv': 0.2 + hp.randint('dropout_conv', 7) / 10.0,
    
    'n_dense_layers': 1 + hp.randint('n_dense_layers', 2),
    'n_dense_neurons': 2 ** (7 + hp.randint('n_dense_neurons', 3)),
    'activation_dense': hp.choice('activation_dense', ['tanh', 'relu']),
    'dropout_dense': 0.2 + hp.randint('dropout_dense', 7) / 10.0,
    
    'optimizer': hp.choice('optimizer', ['rmsprop']),
    'loss_fn': hp.choice('loss_fn', ['binary_crossentropy']),
    
    'batch_size': 2 ** (4 + hp.randint('batch_size', 1)),
    'n_epochs': 20 * (2 + hp.randint('n_epochs', 2)),
}

In [7]:
trials = []
_ = hyperopt.fmin(evaluate_params,
    space=space,
    algo=hyperopt.tpe.suggest,
    max_evals=128)

trial=0001     time=0084s     log_loss=0.82191*
trial=0002     time=0036s     log_loss=15.92442
trial=0003     time=0110s     log_loss=0.59350*
trial=0004     time=0041s     log_loss=0.45905*
trial=0005     time=0051s     log_loss=100.00000
trial=0006     time=0231s     log_loss=0.43255*
trial=0007     time=0054s     log_loss=100.00000
trial=0008     time=0104s     log_loss=100.00000
trial=0009     time=0065s     log_loss=0.69622
trial=0010     time=0082s     log_loss=0.23290*
trial=0011     time=0299s     log_loss=0.74257
trial=0012     time=0301s     log_loss=0.57817
trial=0013     time=0351s     log_loss=15.51771
trial=0014     time=0146s     log_loss=1.28852
trial=0015     time=0310s     log_loss=100.00000
trial=0016     time=0353s     log_loss=0.41847
trial=0017     time=0140s     log_loss=0.77441
trial=0018     time=0310s     log_loss=0.50327
trial=0019     time=0091s     log_loss=0.23781
trial=0020     time=0091s     log_loss=13.08259
trial=0021     time=0083s     log_loss=0.247

In [8]:
results = pd.DataFrame(trials).sort_values('log_loss')
results.to_csv('results.csv', index=False)

In [9]:
results.head(50).T

Unnamed: 0,9,18,20,50,53,59,45,54,51,49,...,10,42,16,26,0,29,48,13,38,19
activation_conv,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,...,tanh,relu,relu,relu,relu,relu,tanh,relu,tanh,relu
activation_dense,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,...,tanh,relu,relu,relu,tanh,relu,relu,relu,relu,relu
activation_input,tanh,tanh,tanh,tanh,tanh,tanh,relu,tanh,tanh,tanh,...,relu,tanh,tanh,tanh,tanh,tanh,relu,tanh,relu,tanh
batch_size,16,16,16,16,16,16,16,16,16,16,...,16,16,16,16,16,16,16,16,16,16
dropout_conv,0.6,0.6,0.6,0.6,0.6,0.5,0.6,0.6,0.6,0.6,...,0.8,0.3,0.6,0.6,0.7,0.7,0.4,0.4,0.7,0.2
dropout_dense,0.3,0.4,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.5,...,0.8,0.6,0.4,0.5,0.5,0.5,0.3,0.7,0.2,0.2
dropout_input,0.3,0.3,0.3,0.3,0.3,0.5,0.6,0.3,0.3,0.4,...,0.5,0.3,0.7,0.8,0.4,0.8,0.3,0.3,0.4,0.3
kernel_size,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
log_loss,0.232898,0.237809,0.247744,0.271041,0.275783,0.279701,0.296495,0.314983,0.323985,0.325512,...,0.742575,0.744101,0.774414,0.804197,0.821913,0.868649,1.0052,1.28852,7.09972,13.0826
loss_fn,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,...,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy,binary_crossentropy


In [10]:
results.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
batch_size,66.0,16.0,0.0,16.0,16.0,16.0,16.0,16.0
dropout_conv,66.0,0.525758,0.170349,0.2,0.4,0.6,0.6,0.8
dropout_dense,66.0,0.443939,0.177269,0.2,0.3,0.4,0.575,0.8
dropout_input,66.0,0.413636,0.190454,0.2,0.3,0.3,0.5,0.8
kernel_size,66.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0
log_loss,66.0,19.83478,38.285083,0.232898,0.388226,0.69045,11.586869,100.0
max_pooling_size_conv,66.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
max_pooling_size_input,66.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
n_conv_layers,66.0,2.242424,0.431834,2.0,2.0,2.0,2.0,3.0
n_dense_layers,66.0,1.818182,0.38865,1.0,2.0,2.0,2.0,2.0
