In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from math import sqrt

In [2]:
train = pd.read_json('./iceberg/train/processed/train.json')

In [3]:
test = pd.read_json('./iceberg/test/processed/test.json')

In [4]:
total_size = len(train.loc[0]['band_1'])

In [5]:
dim = sqrt(total_size)

In [6]:
dim

75.0

In [7]:
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])

In [8]:
x_band1

array([[[-27.87836075, -27.15415955, -28.66861534, ..., -25.57348251,
         -26.48867416, -30.50701332],
        [-28.66852951, -27.8784008 , -27.50877571, ..., -26.4886322 ,
         -28.67156219, -31.5941658 ],
        [-28.66852951, -27.15415955, -25.86504173, ..., -27.1571064 ,
         -27.88139343, -32.83712387],
        ..., 
        [-29.09290504, -28.66916275, -30.50461197, ..., -30.0110054 ,
         -25.8685379 , -26.48926544],
        [-31.59168243, -27.87899208, -28.66920471, ..., -27.51227188,
         -26.81707382, -27.51235771],
        [-29.09290504, -26.81408119, -27.15479088, ..., -26.81703186,
         -27.8819828 , -28.26812744]],

       [[-12.24237537, -14.92030525, -14.92036343, ..., -10.63340187,
         -11.95605373, -14.37902641],
        [-13.04662609, -14.07784081, -12.72854233, ..., -11.24318886,
         -11.78629112, -14.37902641],
        [-15.24811363, -14.07789993, -12.60451412, ..., -15.68016243,
         -13.51685047, -14.45488739],
        ...,

In [9]:
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])

In [10]:
X_train = np.concatenate([x_band1[:, :, :, np.newaxis], x_band2[:, :, :, np.newaxis]], axis=-1)
y_train = np.array(train['is_iceberg'])

In [11]:
X_train.shape

(1604, 75, 75, 2)

In [12]:
# Test Data
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_test = np.concatenate([x_band1[:, :, :, np.newaxis], x_band2[:, :, :, np.newaxis]], axis=-1)

In [13]:
X_test.shape

(8424, 75, 75, 2)

In [14]:
import keras
from keras.models import Sequential
from keras.layers import Convolution2D, GlobalAveragePooling2D, Dense, Dropout

Using TensorFlow backend.


In [15]:
model = Sequential()
model.add(Convolution2D(32, 3, activation="relu", input_shape=(75, 75, 2)))
model.add(Convolution2D(64, 3, activation="relu", input_shape=(75, 75, 2)))
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.3))
model.add(Dense(1, activation="sigmoid"))
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 73, 73, 32)        608       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 71, 71, 64)        18496     
_________________________________________________________________
global_average_pooling2d_1 ( (None, 64)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 19,169
Trainable params: 19,169
Non-trainable params: 0
_________________________________________________________________


In [18]:
model.fit(X_train, y_train, validation_split=0.2, epochs=10)

Train on 1283 samples, validate on 321 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f3cde5130d0>

In [19]:
prediction = model.predict(X_test, verbose=1)



In [21]:
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.flatten()})
submission.to_csv("./simple.csv", index=False)