In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale

%matplotlib inline

In [2]:
df = pd.read_json(os.path.join('input', 'train.json'))

In [3]:
def process_bands(df):
    # reclassify as numpy array
    for band in ['band_1', 'band_2']:
        df[band] = df[band].apply(np.asarray)
    # set band_3 as mean of first two bands
    df['band_3'] = (df.band_1 + df.band_2) / 2


# list of convenience
bands = ['band_1', 'band_2', 'band_3']

process_bands(df)

In [4]:
from IPython.display import display

display(df.iloc[0])

band_1        [-27.878360999999998, -27.15416, -28.668615, -...
band_2        [-27.154118, -29.537888, -31.0306, -32.190483,...
id                                                     dfd5f913
inc_angle                                               43.9239
is_iceberg                                                    0
band_3        [-27.516239499999998, -28.346024, -29.84960749...
Name: 0, dtype: object

In [None]:
no_angle = df.inc_angle == 'na'
angles = df[~no_angle].inc_angle
angles.describe()

count     1471.0000
unique     878.0000
top         34.4721
freq        23.0000
Name: inc_angle, dtype: float64

In [31]:
for name, f in zip(['angle mean', 'angle median'], [np.mean, np.median]):
    print('{:15} = {:.5f}'.format(name, f(angles)))

angle mean      = 39.26871
angle median    = 39.50150


Especially since we are going to use normalization techniques, we choose to fill the incidence angle with the mean value in the training data. 

In [34]:
df.loc[no_angle, 'inc_angle'] = np.mean(angles)

In [35]:
train_df, valid_df = train_test_split(df, random_state=42)

In [36]:
def make_tensors(df):
    gen = (np.stack(getattr(df, band)) for band in bands)
    flat_tensors = np.stack(gen, axis=-1)
    return flat_tensors.reshape(flat_tensors.shape[0], 75, 75, 3)


train_tensors = make_tensors(train_df)
valid_tensors = make_tensors(valid_df)
print(train_tensors.shape)
print(valid_tensors.shape)

(1203, 75, 75, 3)
(401, 75, 75, 3)


In [38]:
train_inc_angle = train_df.inc_angle.values
valid_inc_angle = valid_df.inc_angle.values
print(train_inc_angle.shape)
print(valid_inc_angle.shape)

(1203,)
(401,)


In [40]:
train_targets = train_df.is_iceberg.values
valid_targets = valid_df.is_iceberg.values
print(train_targets.shape)
print(valid_targets.shape)

(1203,)
(401,)


In [41]:
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [42]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=True,
    width_shift_range=.05, 
    height_shift_range=.05, 
    rotation_range=30
)

datagen.fit(np.concatenate((train_tensors, valid_tensors)), augment=True, rounds=2)

In [61]:
from keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten, concatenate
from keras.layers import Dropout
from keras.models import Model
from keras.utils import plot_model


def create_cnn(iceberg_predict=True):
    # inputs
    radar_img_shape = train_tensors.shape[1:]
    radar_input = Input(shape=radar_img_shape, name='radar_image')
    aux_input = Input(shape=(1,), name='aux_input')
    
    # computer vision model for 3-banded radar image
    x = Conv2D(64*3, kernel_size=4, padding='same', activation='relu')(radar_input)
    x = MaxPooling2D(pool_size=2)(x)
    x = Conv2D(128, kernel_size=3, padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=2)(x)
    x = Dropout(.25)(x)
    for _ in range(2):
        x = Conv2D(64, kernel_size=3, padding='same', activation='relu')(x)
        x = MaxPooling2D(pool_size=2)(x)
        x = Dropout(.15)(x)
    radar_output = Flatten()(x)
    
    # add auxiliary input and combine into concatenate layer
    combined_inputs = concatenate([radar_output, aux_input])
    
    # feed both into fully-connected layers
    x = Dense(1024, activation='relu')(combined_inputs)
    x = Dropout(.2)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(.2)(x)
    x = Dense(256, activation='relu')(x)
    
    # final output
    last_act = 'sigmoid' if iceberg_predict else None
    last_name = 'is_iceberg' if iceberg_predict else 'inc_angle'
    output = Dense(1, activation=last_act, name=last_name)(x)
    
    # model definition and summary
    model = Model(inputs=[radar_input, aux_input], outputs=output)
    model.summary()
    plot_model(model, to_file='cnn-{}.png'.format('iceberg' if iceberg_predict else 'angle'))

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
radar_image (InputLayer)         (None, 75, 75, 3)     0                                            
____________________________________________________________________________________________________
conv2d_49 (Conv2D)               (None, 75, 75, 128)   6272        radar_image[0][0]                
____________________________________________________________________________________________________
max_pooling2d_49 (MaxPooling2D)  (None, 25, 25, 128)   0           conv2d_49[0][0]                  
____________________________________________________________________________________________________
conv2d_50 (Conv2D)               (None, 25, 25, 128)   147584      max_pooling2d_49[0][0]           
___________________________________________________________________________________________

In [62]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

early_stop = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
save_best = ModelCheckpoint(filepath=os.path.join('saved_models', 
                                                  'cnn.inc_angle.best.weights.hdf5'), 
                            save_best_only=True, save_weights_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)

In [63]:
model.compile(loss='mse', optimizer='adam', metrics=['mae'])

In [64]:
batch_size = 32
epochs = 100


def img_and_aux_gen(datagen, tensors, aux_input, targets):
    # https://www.kaggle.com/sinkie/keras-data-augmentation-with-multiple-inputs
    img_gen = datagen.flow(x=tensors, y=targets, batch_size=batch_size, seed=42)
    aux_gen = datagen.flow(x=np.zeros_like(tensors), y=aux_input, batch_size=batch_size, seed=42)
    while True:
        x1, y = img_gen.next()
        _, x2 = aux_gen.next()
        yield ([x1, x2], y)
        

gen_flow = img_and_aux_gen(datagen, train_tensors, train_is_iceberg, train_targets)

model.fit_generator(gen_flow, callbacks=[early_stop, save_best, reduce_lr],
                    steps_per_epoch=len(train_targets)/batch_size, 
                    epochs=epochs,
                    validation_data=([valid_tensors, valid_is_iceberg], valid_targets))

Epoch 1/100


 1/34 [..............................] - ETA: 79s - loss: 1520.1907 - mean_absolute_error: 38.8341

 2/34 [>.............................] - ETA: 57s - loss: 1415.6057 - mean_absolute_error: 37.4253

 3/34 [=>............................] - ETA: 47s - loss: 1123.9695 - mean_absolute_error: 32.4617

 4/34 [==>...........................] - ETA: 42s - loss: 995.2419 - mean_absolute_error: 28.9306 

 5/34 [===>..........................] - ETA: 38s - loss: 814.0369 - mean_absolute_error: 24.7424

 6/34 [====>.........................] - ETA: 36s - loss: 718.6467 - mean_absolute_error: 23.0482

 7/34 [=====>........................] - ETA: 33s - loss: 648.5875 - mean_absolute_error: 21.7928

 8/34 [=====>........................] - ETA: 32s - loss: 580.2597 - mean_absolute_error: 20.1261





















































Epoch 00000: val_loss improved from inf to 72911.80167, saving model to saved_models/cnn.inc_angle.best.weights.hdf5


Epoch 2/100


 1/34 [..............................] - ETA: 35s - loss: 30.1656 - mean_absolute_error: 4.5885

 2/34 [>.............................] - ETA: 34s - loss: 25.4715 - mean_absolute_error: 4.0608

 3/34 [=>............................] - ETA: 32s - loss: 28.2865 - mean_absolute_error: 4.2756

 4/34 [==>...........................] - ETA: 31s - loss: 25.9021 - mean_absolute_error: 4.1306

 5/34 [===>..........................] - ETA: 30s - loss: 25.8962 - mean_absolute_error: 3.9953

 6/34 [====>.........................] - ETA: 29s - loss: 24.2023 - mean_absolute_error: 3.8969

 7/34 [=====>........................] - ETA: 28s - loss: 24.2340 - mean_absolute_error: 3.9021

 8/34 [=====>........................] - ETA: 27s - loss: 22.9667 - mean_absolute_error: 3.7692





















































Epoch 00001: val_loss improved from 72911.80167 to 67460.52037, saving model to saved_models/cnn.inc_angle.best.weights.hdf5


Epoch 3/100


 1/34 [..............................] - ETA: 35s - loss: 18.3572 - mean_absolute_error: 3.3001

 2/34 [>.............................] - ETA: 34s - loss: 21.4903 - mean_absolute_error: 3.7334

 3/34 [=>............................] - ETA: 32s - loss: 20.4410 - mean_absolute_error: 3.6306

 4/34 [==>...........................] - ETA: 31s - loss: 18.3053 - mean_absolute_error: 3.3943

 5/34 [===>..........................] - ETA: 30s - loss: 18.5243 - mean_absolute_error: 3.4061

 6/34 [====>.........................] - ETA: 29s - loss: 17.3234 - mean_absolute_error: 3.3053

 7/34 [=====>........................] - ETA: 28s - loss: 17.1587 - mean_absolute_error: 3.3230

 8/34 [=====>........................] - ETA: 27s - loss: 17.3182 - mean_absolute_error: 3.3342





















































Epoch 00002: val_loss did not improve


Epoch 4/100


 1/34 [..............................] - ETA: 36s - loss: 19.5069 - mean_absolute_error: 3.5160

 2/34 [>.............................] - ETA: 34s - loss: 16.1189 - mean_absolute_error: 3.2570

 3/34 [=>............................] - ETA: 33s - loss: 12.8493 - mean_absolute_error: 2.8395

 4/34 [==>...........................] - ETA: 31s - loss: 14.4839 - mean_absolute_error: 3.0204

 5/34 [===>..........................] - ETA: 30s - loss: 13.9109 - mean_absolute_error: 2.9748

 6/34 [====>.........................] - ETA: 29s - loss: 16.1679 - mean_absolute_error: 3.1514

 7/34 [=====>........................] - ETA: 28s - loss: 15.5828 - mean_absolute_error: 3.0927

 8/34 [=====>........................] - ETA: 27s - loss: 15.7544 - mean_absolute_error: 3.1080





















































Epoch 00003: val_loss improved from 67460.52037 to 55946.75377, saving model to saved_models/cnn.inc_angle.best.weights.hdf5


Epoch 5/100


 1/34 [..............................] - ETA: 36s - loss: 17.2306 - mean_absolute_error: 3.3914

 2/34 [>.............................] - ETA: 35s - loss: 19.1448 - mean_absolute_error: 3.5554

 3/34 [=>............................] - ETA: 33s - loss: 21.7774 - mean_absolute_error: 3.7327

 4/34 [==>...........................] - ETA: 32s - loss: 20.6448 - mean_absolute_error: 3.6406

 5/34 [===>..........................] - ETA: 30s - loss: 20.8424 - mean_absolute_error: 3.7193

 6/34 [====>.........................] - ETA: 29s - loss: 19.8513 - mean_absolute_error: 3.5780

 7/34 [=====>........................] - ETA: 28s - loss: 21.7805 - mean_absolute_error: 3.7912

 8/34 [=====>........................] - ETA: 27s - loss: 21.9481 - mean_absolute_error: 3.7838





















































Epoch 00004: val_loss improved from 55946.75377 to 32307.51125, saving model to saved_models/cnn.inc_angle.best.weights.hdf5


Epoch 6/100


 1/34 [..............................] - ETA: 35s - loss: 18.9525 - mean_absolute_error: 3.3792

 2/34 [>.............................] - ETA: 34s - loss: 17.6491 - mean_absolute_error: 3.3821

 3/34 [=>............................] - ETA: 32s - loss: 14.5357 - mean_absolute_error: 2.9842

 4/34 [==>...........................] - ETA: 31s - loss: 12.8029 - mean_absolute_error: 2.8245

 5/34 [===>..........................] - ETA: 30s - loss: 11.4974 - mean_absolute_error: 2.6608

 6/34 [====>.........................] - ETA: 29s - loss: 11.7403 - mean_absolute_error: 2.6889

 7/34 [=====>........................] - ETA: 28s - loss: 12.8490 - mean_absolute_error: 2.8114

 8/34 [=====>........................] - ETA: 27s - loss: 12.7535 - mean_absolute_error: 2.8212





















































Epoch 00005: val_loss improved from 32307.51125 to 18650.60526, saving model to saved_models/cnn.inc_angle.best.weights.hdf5


Epoch 7/100


 1/34 [..............................] - ETA: 36s - loss: 39.9348 - mean_absolute_error: 5.5447

 2/34 [>.............................] - ETA: 34s - loss: 26.6824 - mean_absolute_error: 4.2373

 3/34 [=>............................] - ETA: 33s - loss: 24.4305 - mean_absolute_error: 3.9705

 4/34 [==>...........................] - ETA: 31s - loss: 22.3934 - mean_absolute_error: 3.7967

 5/34 [===>..........................] - ETA: 30s - loss: 21.2252 - mean_absolute_error: 3.7137

 6/34 [====>.........................] - ETA: 29s - loss: 19.8678 - mean_absolute_error: 3.5630

 7/34 [=====>........................] - ETA: 28s - loss: 19.0886 - mean_absolute_error: 3.4881

 8/34 [=====>........................] - ETA: 27s - loss: 18.3142 - mean_absolute_error: 3.3991





















































Epoch 00006: val_loss did not improve


Epoch 8/100


 1/34 [..............................] - ETA: 35s - loss: 21.3822 - mean_absolute_error: 3.0555

 2/34 [>.............................] - ETA: 34s - loss: 15.9639 - mean_absolute_error: 2.7592

 3/34 [=>............................] - ETA: 32s - loss: 13.6081 - mean_absolute_error: 2.6242

 4/34 [==>...........................] - ETA: 31s - loss: 13.8664 - mean_absolute_error: 2.7130

 5/34 [===>..........................] - ETA: 30s - loss: 18.2063 - mean_absolute_error: 3.1667

 6/34 [====>.........................] - ETA: 29s - loss: 16.9329 - mean_absolute_error: 3.0845

 7/34 [=====>........................] - ETA: 28s - loss: 18.9928 - mean_absolute_error: 3.3400

 8/34 [=====>........................] - ETA: 27s - loss: 18.5930 - mean_absolute_error: 3.2933





















































Epoch 00007: val_loss did not improve


Epoch 9/100


 1/34 [..............................] - ETA: 35s - loss: 11.2276 - mean_absolute_error: 2.7550

 2/34 [>.............................] - ETA: 33s - loss: 11.5587 - mean_absolute_error: 2.7472

 3/34 [=>............................] - ETA: 32s - loss: 11.7322 - mean_absolute_error: 2.7418

 4/34 [==>...........................] - ETA: 31s - loss: 13.3156 - mean_absolute_error: 2.9153

 5/34 [===>..........................] - ETA: 30s - loss: 14.1934 - mean_absolute_error: 2.9756

 6/34 [====>.........................] - ETA: 29s - loss: 14.9647 - mean_absolute_error: 3.0117

 7/34 [=====>........................] - ETA: 28s - loss: 14.7114 - mean_absolute_error: 2.9785

 8/34 [=====>........................] - ETA: 27s - loss: 14.7876 - mean_absolute_error: 3.0217





















































Epoch 00008: val_loss did not improve


Epoch 10/100


 1/34 [..............................] - ETA: 35s - loss: 13.8254 - mean_absolute_error: 3.0073

 2/34 [>.............................] - ETA: 34s - loss: 12.4878 - mean_absolute_error: 2.9051

 3/34 [=>............................] - ETA: 33s - loss: 12.7694 - mean_absolute_error: 2.8856

 4/34 [==>...........................] - ETA: 31s - loss: 13.4062 - mean_absolute_error: 2.9370

 5/34 [===>..........................] - ETA: 30s - loss: 12.0304 - mean_absolute_error: 2.7588

 6/34 [====>.........................] - ETA: 29s - loss: 12.3189 - mean_absolute_error: 2.7766

 7/34 [=====>........................] - ETA: 28s - loss: 12.6443 - mean_absolute_error: 2.8354

 8/34 [=====>........................] - ETA: 27s - loss: 12.7081 - mean_absolute_error: 2.8385





















































Epoch 00009: val_loss did not improve

Epoch 00009: reducing learning rate to 0.00010000000474974513.


Epoch 11/100


 1/34 [..............................] - ETA: 35s - loss: 27.2399 - mean_absolute_error: 4.4896

 2/34 [>.............................] - ETA: 34s - loss: 34.6256 - mean_absolute_error: 4.8381

 3/34 [=>............................] - ETA: 32s - loss: 28.3104 - mean_absolute_error: 4.4452

 4/34 [==>...........................] - ETA: 31s - loss: 24.5476 - mean_absolute_error: 4.0534

 5/34 [===>..........................] - ETA: 30s - loss: 22.0775 - mean_absolute_error: 3.7897

 6/34 [====>.........................] - ETA: 29s - loss: 21.6260 - mean_absolute_error: 3.7379

 7/34 [=====>........................] - ETA: 28s - loss: 20.5247 - mean_absolute_error: 3.6058

 8/34 [=====>........................] - ETA: 27s - loss: 19.9599 - mean_absolute_error: 3.5714





















































Epoch 00010: val_loss did not improve


Epoch 12/100


 1/34 [..............................] - ETA: 35s - loss: 13.2080 - mean_absolute_error: 2.9269

 2/34 [>.............................] - ETA: 34s - loss: 12.2955 - mean_absolute_error: 2.8787

 3/34 [=>............................] - ETA: 32s - loss: 10.8394 - mean_absolute_error: 2.6567

 4/34 [==>...........................] - ETA: 31s - loss: 10.6849 - mean_absolute_error: 2.6219

 5/34 [===>..........................] - ETA: 30s - loss: 13.0773 - mean_absolute_error: 2.7930

 6/34 [====>.........................] - ETA: 29s - loss: 14.8021 - mean_absolute_error: 2.8820

 7/34 [=====>........................] - ETA: 28s - loss: 14.4296 - mean_absolute_error: 2.8822

 8/34 [=====>........................] - ETA: 27s - loss: 14.0678 - mean_absolute_error: 2.8552





















































Epoch 00011: val_loss did not improve


Epoch 00011: early stopping


<keras.callbacks.History at 0x1a600d8ef0>

In [67]:
valid_preds = model.predict([valid_tensors, valid_is_iceberg])

In [69]:
valid_targets

(368,)