In [1]:
import os
import numpy as np
import pandas as pd
from skimage.util.montage import montage2d
import matplotlib.pyplot as plt
%matplotlib inline

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="5"

base_path = os.path.join('/workspace/Kaggle/statoil-iceberg/', 'data')

In [2]:
def load_and_format(in_path):
    out_df = pd.read_json(in_path)
    out_images_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in out_df["band_1"]])
    out_images_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in out_df["band_2"]])
    out_images = np.concatenate([out_images_1[:, :, :, np.newaxis], out_images_2[:, :, :, np.newaxis],
                              ((out_images_1+out_images_2)/2)[:, :, :, np.newaxis]], axis=-1)
    return out_df, out_images

In [3]:
train_df, train_images = load_and_format(os.path.join(base_path, 'train.json'))
print('training', train_df.shape, 'loaded', train_images.shape)

test_df, test_images = load_and_format(os.path.join(base_path, 'test.json'))
print('testing', test_df.shape, 'loaded', test_images.shape)

train_df.sample(3)

('training', (1604, 5), 'loaded', (1604, 75, 75, 3))
('testing', (8424, 4), 'loaded', (8424, 75, 75, 3))


Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
241,"[-23.462166, -21.658684, -17.78233, -16.593632...","[-29.262859, -27.324705, -26.984085, -27.67937...",944e1ee2,41.4201,1
1476,"[-24.891918, -23.17366, -25.452568, -26.367756...","[-23.630882, -23.868904, -32.072433, -27.76047...",ac6ca227,45.2803,1
759,"[-21.437099, -21.437157, -23.123631, -21.26860...","[-26.961227, -27.629759, -31.506216, -30.97969...",9997020b,38.4755,1


In [4]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
X_train, X_valid, y_train, y_valid = train_test_split(train_images,
                                                   train_df['is_iceberg'],
                                                    random_state = 2017,
                                                    test_size = 0.20
                                                   )
print('Train', X_train.shape, y_train.shape)
print('Validation', X_valid.shape, y_valid.shape)

Using TensorFlow backend.


('Train', (1283, 75, 75, 3), (1283,))
('Validation', (321, 75, 75, 3), (321,))


In [5]:
from keras.applications.resnet50 import ResNet50
from keras import optimizers
from keras.layers import Dense, Activation, GlobalAveragePooling2D
from keras.models import Model

base_model = ResNet50(include_top=False)
y = base_model.layers[-1].get_output_at(0)
y = GlobalAveragePooling2D()(y)
y = Dense(1)(y)
output = Activation('sigmoid')(y)
model = Model(inputs=[base_model.input],
                           outputs=[output])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D) (None, None, None, 3) 0           input_1[0][0]                    
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, None, None, 64 9472        zero_padding2d_1[0][0]           
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, None, None, 64 256         conv1[0][0]                      
___________________________________________________________________________________________

In [None]:
model.fit(X_train, y_train,
          batch_size=32,
          epochs=50,
          verbose=1,
          validation_data=(X_valid, y_valid))

Train on 1283 samples, validate on 321 samples
Epoch 1/50
