In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from math import sqrt


In [2]:
train = pd.read_json('./iceberg/train/processed/train.json')

In [3]:
test = pd.read_json('./iceberg/test/processed/test.json')

In [4]:
total_size = len(train.loc[0]['band_1'])

In [5]:
dim = sqrt(total_size)

In [12]:
dim

75.0

In [13]:
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])

In [10]:
x_band1

array([[[-27.87836075, -27.15415955, -28.66861534, ..., -25.57348251,
         -26.48867416, -30.50701332],
        [-28.66852951, -27.8784008 , -27.50877571, ..., -26.4886322 ,
         -28.67156219, -31.5941658 ],
        [-28.66852951, -27.15415955, -25.86504173, ..., -27.1571064 ,
         -27.88139343, -32.83712387],
        ..., 
        [-29.09290504, -28.66916275, -30.50461197, ..., -30.0110054 ,
         -25.8685379 , -26.48926544],
        [-31.59168243, -27.87899208, -28.66920471, ..., -27.51227188,
         -26.81707382, -27.51235771],
        [-29.09290504, -26.81408119, -27.15479088, ..., -26.81703186,
         -27.8819828 , -28.26812744]],

       [[-12.24237537, -14.92030525, -14.92036343, ..., -10.63340187,
         -11.95605373, -14.37902641],
        [-13.04662609, -14.07784081, -12.72854233, ..., -11.24318886,
         -11.78629112, -14.37902641],
        [-15.24811363, -14.07789993, -12.60451412, ..., -15.68016243,
         -13.51685047, -14.45488739],
        ...,

In [14]:
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])

In [15]:
X_train = np.concatenate([x_band1[:, :, :, np.newaxis], x_band2[:, :, :, np.newaxis]], axis=-1)
y_train = np.array(train['is_iceberg'])

In [16]:
X_train.shape

(1604, 75, 75, 2)

In [17]:
# Test Data
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_test = np.concatenate([x_band1[:, :, :, np.newaxis], x_band2[:, :, :, np.newaxis]], axis=-1)

In [18]:
X_test.shape

(8424, 75, 75, 2)

In [19]:
import keras
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras import initializers
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation, GlobalMaxPooling2D

Using TensorFlow backend.


In [28]:
model = Sequential()

model.add(Conv2D(64, 3, activation='relu', input_shape=(75, 75, 2)))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.2))


model.add(Dense(1, activation="sigmoid"))
opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 73, 73, 64)        1216      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 36, 36, 64)        0         
_________________________________________________________________
dropout_17 (Dropout)         (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 17, 17, 128)       0         
_________________________________________________________________
dropout_18 (Dropout)         (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 15, 15, 128)       147584    
__________

In [29]:
model.fit(X_train, y_train, validation_split=0.2, epochs=20)

Train on 1283 samples, validate on 321 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f2b4565ed90>

In [22]:
prediction = model.predict(X_test, verbose=1)



In [23]:
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.flatten()})
submission.to_csv("./better.csv", index=False)

In [25]:
x_band1[0,:,:]

array([[-15.86325073, -15.20107746, -17.88773537, ..., -14.14651394,
        -12.89877796, -14.00627327],
       [-16.39520645, -15.20107746, -16.3950634 , ..., -15.43849754,
        -14.00634575, -13.73268032],
       [-15.44356155, -14.36598206, -17.05988121, ..., -17.05496216,
        -15.3568697 , -13.8002739 ],
       ..., 
       [-14.58681679, -17.16007805, -15.86396408, ..., -15.94540024,
        -18.79867744, -19.82165718],
       [-15.04407501, -12.54522896, -12.723032  , ..., -16.30004692,
        -15.35772324, -16.95752907],
       [-14.08210182, -12.54522896, -11.44420624, ..., -15.68872547,
        -15.60473347, -16.95752907]], dtype=float32)

In [26]:
x_band1[0]

array([[-15.86325073, -15.20107746, -17.88773537, ..., -14.14651394,
        -12.89877796, -14.00627327],
       [-16.39520645, -15.20107746, -16.3950634 , ..., -15.43849754,
        -14.00634575, -13.73268032],
       [-15.44356155, -14.36598206, -17.05988121, ..., -17.05496216,
        -15.3568697 , -13.8002739 ],
       ..., 
       [-14.58681679, -17.16007805, -15.86396408, ..., -15.94540024,
        -18.79867744, -19.82165718],
       [-15.04407501, -12.54522896, -12.723032  , ..., -16.30004692,
        -15.35772324, -16.95752907],
       [-14.08210182, -12.54522896, -11.44420624, ..., -15.68872547,
        -15.60473347, -16.95752907]], dtype=float32)