# VGG19 model

## Import modules and load data from pickles

In [1]:
import cv2
import numpy as np
import os
import pandas as pd
import pickle

In [2]:
data_path = os.path.join(os.getcwd(), '..', 'input')

In [3]:
height = 256
width = 256

In [4]:
with open(os.path.join(data_path, 'train_images_256x256.pkl'), 'rb') as fin:
    images = pickle.load(fin)
with open(os.path.join(data_path, 'train_responses.pkl'), 'rb') as fin:
    responses = pickle.load(fin)

## Normalize images

In [5]:
# images = images / 255.
images = images * 2. / 255. - 1.

## Define model architecture

In [6]:
from keras import backend as K
from keras.applications.vgg19 import VGG19
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential, Model
from keras.optimizers import SGD
from keras.callbacks import Callback
from sklearn.metrics import roc_auc_score

In [7]:
class roc_callback(Callback):
    """Define a callback which returns train ROC AUC after each epoch."""

    def __init__(self, training_data, validation_data=None):
        self.x = training_data[0]
        self.y = training_data[1]
        # self.x_val = validation_data[0]
        # self.y_val = validation_data[1]

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.x)
        roc = roc_auc_score(self.y, y_pred)
        # y_pred_val = self.model.predict(self.x_val)
        # roc_val = roc_auc_score(self.y_val, y_pred_val)
        # print('\rroc-auc: %s - roc-auc_val: %s' % (str(round(roc,4)),str(round(roc_val,4))),end=100*' '+'\n')
        print('\rroc-auc: {}'.format(round(roc, 5)), end=80 * ' ' + '\n')
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

In [9]:
def vgg19():
    vgg = VGG19(include_top=False, weights='imagenet', input_shape=(height, width, 3), pooling='max')
    last = vgg.output
    # x = Flatten()(last)
    x = Dense(128, activation='relu')(last)
    x = Dropout(0.5)(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[vgg.input], outputs=[x])

model = vgg19()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 256, 256, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0         
__________

In [10]:
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])
model.fit(images, responses, batch_size=16, epochs=10, callbacks=[roc_callback(training_data=(images, responses))])

Epoch 1/10
roc-auc: 0.99515                                                                                
Epoch 2/10
roc-auc: 0.99743                                                                                
Epoch 3/10
roc-auc: 0.99874                                                                                
Epoch 4/10
roc-auc: 0.99919                                                                                
Epoch 5/10
roc-auc: 0.9995                                                                                
Epoch 6/10
roc-auc: 0.99936                                                                                
Epoch 7/10
roc-auc: 0.99965                                                                                
Epoch 8/10
roc-auc: 0.99972                                                                                
Epoch 9/10
roc-auc: 0.99976                                                                                
Epoch 10/10
roc-auc: 0.99993 

<keras.callbacks.History at 0x7f5facc41908>

In [11]:
model.save('vgg19_10epoch.h5')

In [12]:
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=1e-5, momentum=0.9), metrics=['accuracy'])
model.fit(images, responses, batch_size=16, epochs=10, callbacks=[roc_callback(training_data=(images, responses))])

Epoch 1/10
roc-auc: 0.99994                                                                                
Epoch 2/10
roc-auc: 0.99995                                                                                
Epoch 3/10
roc-auc: 0.99995                                                                                
Epoch 4/10
roc-auc: 0.99996                                                                                
Epoch 5/10
roc-auc: 0.99996                                                                                
Epoch 6/10
roc-auc: 0.99997                                                                                
Epoch 7/10
roc-auc: 0.99997                                                                                
Epoch 8/10
roc-auc: 0.99998                                                                                
Epoch 9/10
roc-auc: 0.99998                                                                                
Epoch 10/10
roc-auc: 0.99999

<keras.callbacks.History at 0x7f5eef0bf390>

In [13]:
model.save('vgg19_20epoch.h5')

In [14]:
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=1e-6, momentum=0.9), metrics=['accuracy'])
model.fit(images, responses, batch_size=16, epochs=5, callbacks=[roc_callback(training_data=(images, responses))])

Epoch 1/5
roc-auc: 0.99999                                                                                
Epoch 2/5
roc-auc: 0.99999                                                                                
Epoch 3/5
roc-auc: 0.99999                                                                                
Epoch 4/5
roc-auc: 0.99999                                                                                
Epoch 5/5
roc-auc: 0.99999                                                                                


<keras.callbacks.History at 0x7f5eed1eecc0>

In [15]:
model.save('vgg19_25epoch.h5')

## Load test set and predict

In [16]:
def img_as_array(image_id, size=None, image_set='train_images'):
    image_path = os.path.join(data_path, image_set, image_id)
    img = cv2.imread(str(image_path))
    if size is None:
        return img
    return cv2.resize(img, size)

In [17]:
test_dir = 'leaderboard_test_data'
holdout_dir = 'leaderboard_holdout_data'

In [18]:
test_images = []
test_ids = []
for image_id in os.listdir(os.path.join(data_path, test_dir)):
    img = img_as_array(image_id, image_set=test_dir)
    test_images.append(img.reshape(1, height, width, 3))
    test_ids.append(image_id)
for image_id in os.listdir(os.path.join(data_path, holdout_dir)):
    img = img_as_array(image_id, image_set=holdout_dir)
    test_images.append(img.reshape(1, height, width, 3))
    test_ids.append(image_id)
test_images = np.concatenate(test_images, axis=0)

In [19]:
# test_images = test_images / 255.
test_images = test_images * 2. / 255. - 1.

In [20]:
predictions = model.predict(test_images)

In [21]:
predictions = predictions.squeeze().tolist()

In [22]:
with open('submission.csv','w') as fout:
    fout.write("image_id,has_oilpalm\n")
    for image_id, has_oilpalm in zip(test_ids, predictions):
        fout.write("{},{}\n".format(image_id, has_oilpalm))