# Kaggle Iceberg Detection Challenge

Using a CNN with ReLU.

#### Load Dependencies

In [1]:
import numpy as np
import pandas as pd

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from keras.layers.merge import Concatenate
from keras.models import Model
from keras.optimizers import Adam
from keras import initializers
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.callbacks import TensorBoard, LearningRateScheduler

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


#### Load Datasets

In [2]:
train = pd.read_json("kaggle/datasets/iceberg/train.json")
test = pd.read_json("kaggle/datasets/iceberg/test.json")

#### Parse the Datasets

In [3]:
X_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])

X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis],((X_band_1 + X_band_2) / 2)[:, :, :, np.newaxis]], axis = -1)

#### Build the Model

In [5]:
#Building the model
model = Sequential()
#Conv Layer 1
model.add(Conv2D(16, kernel_size = 5, padding = 'same', activation = 'relu', input_shape = (75, 75, 3)))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

#Conv Layer 2
model.add(Conv2D(32, kernel_size = 5, padding = 'same', activation = 'relu' ))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

#Conv Layer 3
model.add(Conv2D(64, kernel_size = 3, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

#Conv Layer 4
model.add(Conv2D(64, kernel_size = 3, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

#Flatten the data for upcoming dense layers
model.add(Flatten())

#Dense Layers
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

#Dense Layer 2
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

#Sigmoid Layer
model.add(Dense(1))
model.add(Activation('sigmoid'))

#### Split Train / Test Datasets

In [6]:
target_train = train['is_iceberg']
X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, target_train, random_state = 1, train_size = 0.75)

#### Compile Model

In [7]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 75, 75, 16)        1216      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 37, 37, 16)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 37, 37, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 37, 37, 32)        12832     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 18, 18, 32)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 18, 18, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 64)        18496     
__________

#### Tensorboard

In [8]:
tensorboard = TensorBoard("../logs/cnn-iceberg-ReLU-I")

#### Data Augmentation

In [9]:
train_datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 10)

test_datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 10)

train_generator = train_datagen.flow(X_train_cv, y_train_cv, batch_size = 16)

validation_generator = test_datagen.flow(X_valid, y_valid, batch_size = 16)

#### Train Model

In [10]:
hist = model.fit_generator(train_generator,
                           steps_per_epoch = 1200,
                           epochs = 10,
                           verbose = 1,
                           validation_data = validation_generator,
                           validation_steps = 400)
#                            callbacks = [tensorboard])

# model.fit(X_train_cv, y_train_cv,
#           batch_size = 16,
#           epochs = 3,
#           verbose = 1,
#           validation_data = (X_valid, y_valid))
#           callbacks = [tensorboard])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#### Evaluate Model

In [11]:
score = model.evaluate(X_valid, y_valid, verbose = 1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test accuracy: 0.887780548777


#### Prepare data for Submission

In [12]:
# X_band_test_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
# X_band_test_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
# X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis],
#                         X_band_test_2[:, :, :, np.newaxis],
#                         ((X_band_test_1 + X_band_test_2) / 2)[:, :, :, np.newaxis]], axis = -1)

# predicted_test = model.predict_proba(X_test)

#### Submit

In [13]:
# submission = pd.DataFrame()
# submission['id'] = test['id']
# submission['is_iceberg'] = predicted_test.reshape((predicted_test.shape[0]))
# submission.to_csv('kaggle/results/iceberg/submission-cnn-iceberg-relu-20-epochs-batch-16-augmentation.csv', index = False)