In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit

##### Import Data

In [2]:
train = pd.read_json("../data/train.json")
target_train=train['is_iceberg']
test = pd.read_json("../data/test.json")

In [3]:
X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], 
                          X_band_2[:, :, :, np.newaxis],
                          ((X_band_1+X_band_2)/2)[:, :, :, np.newaxis]], axis=-1)
X_train_new = X_train/100+0.5

In [4]:
#incident angle:
train.inc_angle = train.inc_angle.replace('na', 0)
train.inc_angle = train.inc_angle.astype(float).fillna(0.0)
X_train_inc = np.array(train.inc_angle)
X_test_inc = np.array(test.inc_angle)

In [5]:
X_band_test_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
X_band_test_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis]
                          , X_band_test_2[:, :, :, np.newaxis]
                         , ((X_band_test_1+X_band_test_2)/2)[:, :, :, np.newaxis]], axis=-1)
X_test_new = X_test/100+0.5

In [6]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model,load_model
from keras import initializers
from keras.initializers import glorot_uniform
from keras.optimizers import Adam, RMSprop, rmsprop, SGD
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras import layers
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.utils import plot_model
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras import regularizers

Using TensorFlow backend.


In [7]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model
def getVggModel():
    
    base_model = VGG16(weights='imagenet', include_top=False, 
                 input_shape=X_train.shape[1:], classes=1)

    x = base_model.get_layer('block5_pool').output
    x = GlobalMaxPooling2D()(x)
    x= Dense(256, activation='relu', name='fc2')(x)
    x = Dropout(0.2)(x)
    
    predictions = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    #myoptim=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)#, decay=0.001)
    #sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    #model.compile(loss='binary_crossentropy',
    #              optimizer=myoptim,
    #              metrics=['accuracy'])
    return model

In [8]:
def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]

file_path = "../weights_VGG16_pretrained_111.hdf5"

In [19]:
#Data Augmentation
batch_size = 64

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
            #rescale=1./50,
            rotation_range=20,  horizontal_flip=True,  vertical_flip=True,
            width_shift_range = 0.30,  height_shift_range = 0.30,  zoom_range = 0.1)

test_datagen = ImageDataGenerator(
            #rescale=1./50, 
            horizontal_flip=True, vertical_flip=True)

X_train_cv, X_valid, X_angle_train, X_angle_valid, y_train_cv, y_valid = train_test_split(X_train_new, 
                                        X_train_inc, target_train, random_state=16, train_size=0.75)

train_generator = train_datagen.flow(X_train_cv, y_train_cv, batch_size=batch_size)  
validation_generator = test_datagen.flow(X_valid, y_valid, batch_size=batch_size)



In [21]:
model = getVggModel()
# Non-trainable layers:
for layer in model.layers[:6]:
    layer.trainable = False
# Optimizer
#model.summary()
myoptim=Adam(lr=0.00015, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.001)
# Callbacks
callbacks = get_callbacks(filepath=file_path, patience=10)
# Compile
model.compile(optimizer=myoptim, loss='binary_crossentropy', metrics=['accuracy'])
#model.summary()
model.fit_generator(train_generator,
                         steps_per_epoch=128,
                         epochs=100,
                         verbose=1,
                         validation_data=(X_valid, y_valid),
                         #validation_data = validation_generator,
                         #validation_steps = len(X_valid)/batch_size,
                         callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


<keras.callbacks.History at 0x1b538f1a58>

In [22]:
########### PRIOR TO SUBMISSION  ############
model.load_weights(filepath=file_path)
#score = gmodel.evaluate_generator(validation_generator,steps = 1)
score = model.evaluate(X_valid, y_valid)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.23606977439
Test accuracy: 0.907730673614


In [23]:
#predicted_test=gmodel.predict_generator(test_datagen.flow(X_test,batch_size=len(X_test)), steps=1)
#predicted_test=model.predict(X_test)
#file_path = "../weights_VGG16_pretrained_2.hdf5"
#model = getVggModel()
model.load_weights(filepath=file_path)
predicted_test=model.predict(X_test_new)
submission = pd.DataFrame()
submission['id']=test['id']
submission['is_iceberg']=predicted_test.reshape((predicted_test.shape[0]))
#submission.to_csv('../submit/submission11072000.csv', index=False)
#predicted_test=gmodel.predict_proba(X_test)

In [24]:
leaky_angle = [34.4721, 42.5591, 33.6352, 36.1061, 39.2340]
mask = [test['inc_angle'][i] in leaky_angle for i in range(len(test))]
column_name = 'is_iceberg'
submission.loc[mask, column_name] = 1


In [25]:
submission.to_csv('../submit/submission11261621.csv', index=False)

In [26]:
submission.head(n=10)

Unnamed: 0,id,is_iceberg
0,5941774d,0.1526262
1,4023181e,0.1772294
2,b20200e4,0.0008380009
3,e7f018bb,0.9902607
4,4371c8c3,0.03372379
5,a8d9b1fd,0.3415713
6,29e7727e,0.03347115
7,92a51ffb,0.9961688
8,c769ac97,1.210031e-07
9,aee0547d,1.365193e-09


In [72]:
s1 = pd.read_csv('../submit/submission11162343.csv')
s2 = pd.read_csv('../submit/submission11132225.csv')
s3 = pd.read_csv('../submit/submission11142250.csv')
s4 = pd.read_csv('../submit/submission11151028.csv')

In [73]:
s1.is_iceberg= (s1.is_iceberg+s2.is_iceberg+s3.is_iceberg+s4.is_iceberg)/4

In [46]:
s1.to_csv('../submit/ens_test_7.csv',index = False)

In [43]:
s5 = pd.read_csv('../submit/submission11132225.csv')
s1 = pd.read_csv('../submit/submission11162343.csv')

In [44]:
s1.is_iceberg= 0.1*s1.is_iceberg+0.9*s5.is_iceberg

In [45]:
np.sum(s1.is_iceberg>1)

0