In [1]:
import pandas as pd 
import numpy as np 
import cv2 # Used to manipulated the images 
np.random.seed(1337) #


# Import Keras 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from subprocess import check_output

##load data
train=pd.read_json('train.json')
test=pd.read_json('test.json')
train.inc_angle=train.inc_angle.replace('na',0)#replace non-number with 0
train=train[train.inc_angle>0]#choose non-zero data for training
test.inc_angle = test.inc_angle.replace('na',0)
test=test[test.inc_angle>0]
print(train.columns)


Using TensorFlow backend.


Index(['band_1', 'band_2', 'id', 'inc_angle', 'is_iceberg'], dtype='object')


In [2]:
def get_scaled_imgs(df):
    imgs = []
    for i, row in df.iterrows():
      band_1=np.array(row['band_1']).reshape(75,75)
      band_2=np.array(row['band_2']).reshape(75,75)
      band_3=(band_1 + band_2)/2

      a = (band_1-band_1.mean())/(band_1.max()-band_1.min())
      b = (band_2-band_2.mean())/(band_2.max()-band_2.min())
      c = (band_3-band_3.mean())/(band_3.max()-band_3.min())
      
      imgs.append(np.dstack((a,b,c)))

    return np.array(imgs)

In [3]:
def data_augment(imgs):
    lr_imgs = []
    ud_imgs = []
    for x in range(0,imgs.shape[0]):
        band_1=imgs[x,:,:,0]
        band_2=imgs[x,:,:,1]
        band_3=imgs[x,:,:,2]
        #data generated by VAE
  
        # lr augment
        band_1_lr = np.fliplr(band_1)
        band_2_lr = np.fliplr(band_2)
        band_3_lr = np.fliplr(band_3)
        #band_4_lr = np.fliplr(band_4)
        lr_imgs.append(np.dstack((band_1_lr, band_2_lr, band_3_lr)))
        #ud augment
        band_1_ud = np.flipud(band_1)
        band_2_ud = np.flipud(band_2)
        band_3_ud = np.flipud(band_3)
        #band_4_ud = np.flipud(band_4)
        ud_imgs.append(np.dstack((band_1_ud, band_2_ud, band_3_ud)))
    lr_imgs = np.array(lr_imgs)
    ud_imgs = np.array(ud_imgs)
    images = np.concatenate((imgs, ud_imgs, lr_imgs))
    return imgs

In [4]:
x_train = get_scaled_imgs(train)
x_train = data_augment(x_train)
y_train = train.is_iceberg
y_train = np.concatenate((y_train, y_train, y_train))
x_test = get_scaled_imgs(test)
idno_test = test.id

In [5]:
 def getModel():
    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    
    optimizer = Adam(lr=0.001, decay=0.0)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [6]:
model = getModel()
model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 73, 73, 64)        1792      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       147584    
__________

In [7]:
batch_size = 32
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

In [8]:
model.fit(x_train, x_train, batch_size=batch_size, epochs=50, verbose=1, callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_split=0.25)

ValueError: Error when checking target: expected dense_3 to have 2 dimensions, but got array with shape (1471, 75, 75, 3)