In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from keras.layers import Dense
from keras.models import Model
from keras.applications.vgg16 import VGG16
import cv2
%matplotlib inline

In [6]:
DATA_DIR = '/home/jeff/Downloads/data/statoil/'
train = pd.read_json(os.path.join(DATA_DIR, 'train.json'))
train['inc_angle'] = pd.to_numeric(train['inc_angle'],errors='coerce')
train.fillna(method='pad', inplace=True)

In [11]:
X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
X_band_3=(X_band_1+X_band_2)/2
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis]
                          , X_band_2[:, :, :, np.newaxis]
                         , X_band_3[:, :, :, np.newaxis]], axis=-1)

In [12]:
import pandas as pd
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.optimizers import SGD

# input
model_input = Input(shape=(75,75,3))

# hidden layers
# Block 1
x = Conv2D(16, (3, 3), activation='relu', padding='same', name='block1_conv1')(model_input)
x = Conv2D(16, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

x = Flatten(name='flatten')(x)
x = Dense(100, activation='relu', name='fc1')(x)
x = Dense(100, activation='relu', name='fc2')(x)
x = Dense(1, name='pre-predictions')(x)
x = Activation('sigmoid')(x)

sgd = SGD(lr=0.01)
model = Model(model_input, x)

model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=32, epochs=10, verbose=1, validation_split=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7faafbb1ff98>

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 75, 75, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 75, 75, 16)        448       
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 75, 75, 16)        2320      
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 37, 37, 16)        0         
_________________________________________________________________
flatten (Flatten)            (None, 21904)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 100)               2190500   
_________________________________________________________________
fc2 (Dense)                  (None, 100)               10100     
__________

In [21]:
np.matrix(train['band_1'].values).shape
y_train

array([0, 0, 1, ..., 0, 0, 0])

In [15]:
y_train = train['is_iceberg'].values
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fab0348fc50>

In [16]:
test = pd.read_json(os.path.join(DATA_DIR, 'test.json'))
X_band_test_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
X_band_test_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_band_test_3=(X_band_test_1+X_band_test_2)/2
X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis]
                          , X_band_test_2[:, :, :, np.newaxis]
                         , X_band_test_3[:, :, :, np.newaxis]], axis=-1)

In [18]:
preds = model.predict(X_test)

In [22]:
 with open(os.path.join(DATA_DIR, 'predictions.csv'), 'w') as pred_file:
        pred_file.write('id,is_iceberg\n')
        for i,pred in enumerate(preds):
            pred_file.write('{},{}\n'.format(test['id'][i], pred[0]))