In [1]:
import numpy as np 
np.random.seed(666)
import pandas as pd 
from sklearn.model_selection import train_test_split
from subprocess import check_output
import sys
sys.path.insert(0, "/opt/DL/tensorflow/lib/python2.7/site-packages/")
from matplotlib import pyplot
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten
from keras.layers import GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping

import matplotlib
import matplotlib.pyplot as plt
import glob
from scipy import misc
from scipy import fftpack


Using TensorFlow backend.


In [2]:
#Load data
train = pd.read_json("./train.json")
test = pd.read_json("./test.json")

train.head(3)

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878361, -27.15416, -28.668615, -29.537971...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920305, -14.920363, -12.66633...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1


In [3]:
def denoise(img, weight=0.1, eps=1e-3, num_iter_max=200):
    u = np.zeros_like(img)
    px = np.zeros_like(img)
    py = np.zeros_like(img)
    
    nm = np.prod(img.shape[:2])
    tau = 0.125
    
    i = 0
    while i < num_iter_max:
        u_old = u
        
        # x and y components of u's gradient
        ux = np.roll(u, -1, axis=1) - u
        uy = np.roll(u, -1, axis=0) - u
        
        # update the dual variable
        px_new = px + (tau / weight) * ux
        py_new = py + (tau / weight) * uy
        norm_new = np.maximum(1, np.sqrt(px_new **2 + py_new ** 2))
        px = px_new / norm_new
        py = py_new / norm_new

        # calculate divergence
        rx = np.roll(px, 1, axis=1)
        ry = np.roll(py, 1, axis=0)
        div_p = (px - rx) + (py - ry)
        
        # update image
        u = img + weight * div_p
        
        # calculate error
        error = np.linalg.norm(u - u_old) / np.sqrt(nm)
        
        if i == 0:
            err_init = error
            err_prev = error
        else:
            # break if error small enough
            if np.abs(err_prev - error) < eps * err_init:
                break
            else:
                e_prev = error
                
        # don't forget to update iterator
        i += 1

    return u

## Build datasets

This step takes some time.... be patient

In [4]:
mode = 'normal'  # could be 'normal' or 'denoise'
denoise_weight = 1  # from 1 to 10

## For testing purposes 
#for band in train["band_1"]:
#    print len(band)
#    print (np.array(band).astype(np.float32).reshape(75, 75)).shape
#    print (denoise(np.array(band).astype(np.float32).reshape(75, 75), weight=10)).shape
#    break
    
# Train data
if(mode == 'normal'):
    x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
    x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
else: # denoise
    x_band1 = np.array([denoise(np.array(band).astype(np.float32).reshape(75, 75), weight=denoise_weight) for band in train["band_1"]])
    x_band2 = np.array([denoise(np.array(band).astype(np.float32).reshape(75, 75), weight=denoise_weight) for band in train["band_2"]])   
X_train = np.concatenate([x_band1[:, :, :, np.newaxis]
                          , x_band2[:, :, :, np.newaxis]
                         , ((x_band1+x_band2)/2)[:, :, :, np.newaxis]], axis=-1)
y_train = np.array(train["is_iceberg"])

# Test data
if(mode == 'normal'):
    x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
    x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
else:   # denoise
    x_band1 = np.array([denoise(np.array(band).astype(np.float32).reshape(75, 75), weight=denoise_weight) for band in test["band_1"]])
    x_band2 = np.array([denoise(np.array(band).astype(np.float32).reshape(75, 75), weight=denoise_weight) for band in test["band_2"]])
    

X_test = np.concatenate([x_band1[:, :, :, np.newaxis]
                          , x_band2[:, :, :, np.newaxis]
                         , ((x_band1+x_band2)/2)[:, :, :, np.newaxis]], axis=-1)

X_train, X_valid, y_train, y_valid = train_test_split(X_train
                    ,  y_train, random_state=123, train_size=0.75)




In [5]:
def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]
    
def get_model():
    bn_model = 0
    p_activation = "elu"
    input_1 = Input(shape=(75, 75, 3), name="X_1")
    img_1 = Conv2D(16, kernel_size = (3,3), activation=p_activation) ((BatchNormalization(momentum=bn_model))(input_1))
    img_1 = Conv2D(16, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = MaxPooling2D((2,2)) (img_1)
    img_1 = Dropout(0.2)(img_1)
    img_1 = Conv2D(32, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = Conv2D(32, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = MaxPooling2D((2,2)) (img_1)
    img_1 = Dropout(0.2)(img_1)
    img_1 = Conv2D(64, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = Conv2D(64, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = MaxPooling2D((2,2)) (img_1)
    img_1 = Dropout(0.2)(img_1)
    img_1 = Conv2D(128, kernel_size = (3,3), activation=p_activation) (img_1)
    img_1 = MaxPooling2D((2,2)) (img_1)
    img_1 = Dropout(0.2)(img_1)
    img_1 = GlobalMaxPooling2D() (img_1)

    dense_layer = Dropout(0.5) (BatchNormalization(momentum=bn_model) ( Dense(256, activation=p_activation)(img_1) ))
    dense_layer = Dropout(0.5) (BatchNormalization(momentum=bn_model) ( Dense(64, activation=p_activation)(dense_layer) ))
    output = Dense(1, activation="sigmoid")(dense_layer)
    model = Model([input_1],  output)
    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss="binary_crossentropy", optimizer=optimizer,  metrics=['accuracy'])
    return model
model = get_model()
# model.summary()

In [6]:
file_path = ".model_weights.hdf5"
callbacks = get_callbacks(filepath=file_path, patience=5)

model = get_model()
model.fit([X_train], y_train, epochs=50
          , validation_data=([X_valid], y_valid)
         , batch_size=32
         , callbacks=callbacks)

Train on 1203 samples, validate on 401 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


<keras.callbacks.History at 0x3ffd168f0b90>

In [7]:
model.load_weights(filepath=file_path)

print("Train evaluate:")
acc1 = model.evaluate([X_train], y_train, verbose=0, batch_size=200)
print(acc1)
print("-------------------------------")
print("Watch list evaluate:")
acc2 = model.evaluate([X_valid], y_valid, verbose=0, batch_size=200)
print (acc2)

str1 = ('Training acc: {}\n'.format(acc1))
str2 = ('Validation acc: {}\n'.format(acc2))
out_file = './CNN-' + mode + '-results.txt'
with open(out_file, 'a') as myfile:
    myfile.write(str1)
    myfile.write(str2)
    
myfile.close()

Train evaluate:
[0.20539191333036669, 0.91687448046550291]
-------------------------------
Watch list evaluate:
[0.28606525487487117, 0.87032419428266494]


In [8]:
prediction = model.predict([X_test], verbose=1, batch_size=200)



In [9]:
print prediction.shape

(8424, 1)


In [10]:
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.head(10)

Unnamed: 0,id,is_iceberg
0,5941774d,0.103309
1,4023181e,0.886354
2,b20200e4,0.52022
3,e7f018bb,0.996098
4,4371c8c3,0.968497
5,a8d9b1fd,0.945203
6,29e7727e,0.073924
7,92a51ffb,0.998137
8,c769ac97,0.037141
9,aee0547d,0.001703


In [11]:
output_file = './CNN-' + mode + '-submission.csv'
submission.to_csv(output_file, index=False)