# Kaggle Iceberg Detection Challenge

Using a CNN with ReLUs.

#### Load Dependencies

In [1]:
import numpy as np
import pandas as pd

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from keras.layers import GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras import initializers
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.callbacks import TensorBoard

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


#### Load Datasets

In [2]:
train = pd.read_json("kaggle/datasets/iceberg/train.json")
test = pd.read_json("kaggle/datasets/iceberg/test.json")

#### Parse the Datasets

In [3]:
X_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])

X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis],((X_band_1 + X_band_2) / 2)[:, :, :, np.newaxis]], axis = -1)

#### Define custom Activation Function

In [4]:
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects

def relus(Z):
    e_param = 1.8
    pi = K.variable((3.14))
    m = e_param + (K.sigmoid(K.sin(Z)) - K.sigmoid(K.cos(Z)) * K.exp(K.sqrt(pi)))
    A = K.maximum(m, Z)
    return A

get_custom_objects().update({'ReLU_s': Activation(relus)})

#### Build the Model

In [5]:
#Building the model
model = Sequential()
#Conv Layer 1
model.add(Conv2D(64, kernel_size = (3, 3), activation = 'ReLU_s', input_shape = (75, 75, 3)))
model.add(MaxPooling2D(pool_size = (3, 3), strides = (2, 2)))
model.add(Dropout(0.2))

#Conv Layer 2
model.add(Conv2D(128, kernel_size = (3, 3), activation = 'ReLU_s' ))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(Dropout(0.2))

#Conv Layer 3
model.add(Conv2D(128, kernel_size = (3, 3), activation = 'ReLU_s'))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(Dropout(0.2))

#Conv Layer 4
model.add(Conv2D(64, kernel_size = (3, 3), activation = 'ReLU_s'))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(Dropout(0.2))

#Flatten the data for upcoming dense layers
model.add(Flatten())

#Dense Layers
model.add(Dense(512))
model.add(Activation('ReLU_s'))
model.add(Dropout(0.2))

#Dense Layer 2
model.add(Dense(256))
model.add(Activation('ReLU_s'))
model.add(Dropout(0.2))

#Sigmoid Layer
model.add(Dense(1))
model.add(Activation('sigmoid'))

#### Split Train / Test Datasets

In [6]:
target_train = train['is_iceberg']
X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, target_train, random_state = 1, train_size = 0.75)

#### Compile Model

In [7]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 73, 73, 64)        1792      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       147584    
__________

#### Tensorboard

In [None]:
tensorboard = TensorBoard("../logs/cnn-iceberg-ReLUs-30-epochs-batch-24")

#### Train Model

In [None]:
model.fit(X_train_cv, y_train_cv,
          batch_size = 16,
          epochs = 3,
          verbose = 1,
          validation_data = (X_valid, y_valid))
#           callbacks = [tensorboard])

Train on 1203 samples, validate on 401 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
 208/1203 [====>.........................] - ETA: 28s - loss: 1.0492 - acc: 0.5144

#### Evaluate Model

In [None]:
score = model.evaluate(X_valid, y_valid, verbose = 1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

#### Prepare data for Submission

In [None]:
# X_band_test_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
# X_band_test_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
# X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis],
#                         X_band_test_2[:, :, :, np.newaxis],
#                         ((X_band_test_1 + X_band_test_2) / 2)[:, :, :, np.newaxis]], axis = -1)

# predicted_test = model.predict_proba(X_test)

#### Submit

In [None]:
# submission = pd.DataFrame()
# submission['id'] = test['id']
# submission['is_iceberg'] = predicted_test.reshape((predicted_test.shape[0]))
# submission.to_csv('kaggle/results/iceberg/submission-cnn-iceberg-ReLUs-30-epochs-batch-24.csv', index = False)