In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from subprocess import check_output
print(check_output(["ls", "../data"]).decode("utf8"))

sample_submission.csv
test.json
train.json



In [2]:
from sklearn.model_selection import train_test_split
from os.path import join as opj

train = pd.read_json("../data/train.json")
test = pd.read_json("../data/test.json")

In [3]:
X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], 
                          X_band_2[:, :, :, np.newaxis],
                          ((X_band_1+X_band_2)/2)[:, :, :, np.newaxis]], axis=-1)

In [6]:
#incident angle:
train.inc_angle = train.inc_angle.replace('na', 0)
train.inc_angle = train.inc_angle.astype(float).fillna(0.0)
X_train_inc = np.array(train.inc_angle)
X_test_inc = np.array(test.inc_angle)

In [4]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from keras.layers import GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras import initializers
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping

Using TensorFlow backend.
  return f(*args, **kwds)


In [81]:
def getModel(KernelSize = (5,5), Momentum = 0.99):
    
    #Building the model
    gmodel=Sequential()
    #Conv Layer 1
    gmodel.add(Conv2D(32, kernel_size=KernelSize, input_shape=(75, 75, 3), padding='same'))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2)))
    gmodel.add(Dropout(0.25))
    gmodel.add(Conv2D(64, kernel_size=KernelSize, padding='same'))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2)))
    gmodel.add(Dropout(0.25))

    #Conv Layer 2 - first residual
    gmodel.add(Conv2D(128, kernel_size=KernelSize, padding = "same"))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(Dropout(0.25))
    gmodel.add(Conv2D(64, kernel_size=KernelSize, padding = "same"))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    
    # Top CNN
    gmodel.add(Conv2D(128, kernel_size=KernelSize, padding='same'))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2)))
    gmodel.add(Dropout(0.25))
    gmodel.add(Conv2D(256, kernel_size=KernelSize, padding='same'))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2)))
    gmodel.add(Dropout(0.25))
    #gmodel.add(Conv2D(512, kernel_size=KernelSize, padding='same'))
    #gmodel.add(BatchNormalization(momentum=Momentum))
    #gmodel.add(Activation('elu'))
    #gmodel.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2)))
    #gmodel.add(Dropout(0.25))
    gmodel.add(GlobalMaxPooling2D())
    #Flatten the data for upcoming dense layers
    #gmodel.add(Flatten())

    #Dense Layers
    gmodel.add(Dense(256))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(Dropout(0.5))

    #Dense Layer 2
    gmodel.add(Dense(128))
    gmodel.add(BatchNormalization(momentum=Momentum))
    gmodel.add(Activation('elu'))
    gmodel.add(Dropout(0.5))

    #Sigmoid Layer
    gmodel.add(Dense(1))
    gmodel.add(Activation('sigmoid'))

    mypotim=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.005)
    gmodel.compile(loss='binary_crossentropy',
                  optimizer=mypotim,
                  metrics=['accuracy'])
    gmodel.summary()
    return gmodel


def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]

In [82]:
target_train=train['is_iceberg']
X_train_cv, X_valid, X_angle_train, X_angle_valid, y_train_cv, y_valid = train_test_split(X_train, 
                                        X_train_inc, target_train, random_state=66, train_size=0.8)
#X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, target_train, random_state=66, train_size=0.75)



In [83]:
#Data Augmentation
batch_size = 64
file_path = "../weights11072129.hdf5"
callbacks = get_callbacks(filepath=file_path, patience=15)

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
            #rescale=1./255,
            rotation_range=20,
            horizontal_flip=True,
            vertical_flip=True,
            width_shift_range = 0.30,
            height_shift_range = 0.30,
            zoom_range = 0.1
            )

test_datagen = ImageDataGenerator(
            #rescale=1./255
            )

train_generator = train_datagen.flow(X_train_cv, y_train_cv, batch_size=batch_size)  

validation_generator = train_datagen.flow(X_valid, y_valid, batch_size=batch_size)

In [84]:
import os
gmodel=getModel()
gmodel.fit_generator(train_generator,
                     steps_per_epoch=2**13/batch_size,
                     epochs=100,
                     verbose=1,
                     validation_data=(X_valid, y_valid),
                     #validation_data = validation_generator,
                     #validation_steps = len(X_valid)/batch_size,
                     callbacks=callbacks)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_185 (Conv2D)          (None, 75, 75, 32)        2432      
_________________________________________________________________
batch_normalization_219 (Bat (None, 75, 75, 32)        128       
_________________________________________________________________
activation_236 (Activation)  (None, 75, 75, 32)        0         
_________________________________________________________________
max_pooling2d_112 (MaxPoolin (None, 37, 37, 32)        0         
_________________________________________________________________
dropout_184 (Dropout)        (None, 37, 37, 32)        0         
_________________________________________________________________
conv2d_186 (Conv2D)          (None, 37, 37, 64)        51264     
_________________________________________________________________
batch_normalization_220 (Bat (None, 37, 37, 64)        256       
__________

<keras.callbacks.History at 0x220199828>

In [85]:
gmodel.load_weights(filepath=file_path)
#score = gmodel.evaluate_generator(validation_generator,steps = 1)
score = gmodel.evaluate(X_valid, y_valid)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.2561753836
Test accuracy: 0.866043613707


In [86]:
X_band_test_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
X_band_test_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis]
                          , X_band_test_2[:, :, :, np.newaxis]
                         , ((X_band_test_1+X_band_test_2)/2)[:, :, :, np.newaxis]], axis=-1)

In [87]:
#predicted_test=gmodel.predict_generator(test_datagen.flow(X_test,batch_size=len(X_test)), steps=1)
predicted_test=gmodel.predict_proba(X_test)
submission = pd.DataFrame()
submission['id']=test['id']
submission['is_iceberg']=predicted_test.reshape((predicted_test.shape[0]))
#submission.to_csv('../submit/submission11072000.csv', index=False)
#predicted_test=gmodel.predict_proba(X_test)



In [None]:
#11070205: test loss: 0.2707
#11071435: test loss: 0.252399
#11080014: PL: 0.2161 Test Loss: 0.246969386935
#11081153: PL: 0.2190 Test Loss: 0.2518

In [91]:
leaky_angle = [34.4721, 42.5591, 33.6352, 36.1061, 39.2340]
mask = [test['inc_angle'][i] in leaky_angle for i in range(len(test))]
column_name = 'is_iceberg'
submission.loc[mask, column_name] = 1

In [92]:
submission.to_csv('../submit/submission11090951.csv', index=False)

In [93]:
submission.head()

Unnamed: 0,id,is_iceberg
0,5941774d,0.151205
1,4023181e,0.185793
2,b20200e4,0.001467
3,e7f018bb,0.928047
4,4371c8c3,0.144764
