# ResNet50 model

## Import modules and load data from pickles

In [None]:
import cv2
import numpy as np
import os
import pandas as pd
import pickle

from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.callbacks import Callback
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.models import Model, load_model
from keras.optimizers import SGD
from sklearn.metrics import roc_auc_score

In [None]:
data_path = os.path.join(os.getcwd(), '..', 'input')

In [None]:
height = 256
width = 256

In [None]:
with open(os.path.join(data_path, 'train_images_256x256.pkl'), 'rb') as fin:
    train_images = pickle.load(fin)
with open(os.path.join(data_path, 'train_responses.pkl'), 'rb') as fin:
    train_responses = pickle.load(fin)
with open(os.path.join(data_path, 'augmented_images_256x256.pkl'), 'rb') as fin:
    augmented_images = pickle.load(fin)
with open(os.path.join(data_path, 'augmented_responses.pkl'), 'rb') as fin:
    augmented_responses = pickle.load(fin)

In [None]:
images = np.concatenate([train_images, augmented_images], axis=0)
responses = np.concatenate([train_responses, augmented_responses], axis=0)
del train_images
del augmented_images
del train_responses
del augmented_responses

In [None]:
# Shuffle data
permutation = np.random.permutation(images.shape[0])
images = images[permutation, :, :, :]
responses = responses[permutation, :]

In [None]:
# with open(os.path.join(data_path, 'train_images_256x256.pkl'), 'rb') as fin:
#     images = pickle.load(fin)
# with open(os.path.join(data_path, 'train_responses.pkl'), 'rb') as fin:
#     responses = pickle.load(fin)

## Normalize images

In [None]:
# images = images / 255.
# images = images * 2. / 255. - 1.
images = preprocess_input(images)

## Define model architecture

In [None]:
class roc_callback(Callback):
    """Define a callback which returns train ROC AUC after each epoch."""

    def __init__(self, training_data, validation_data=None):
        self.x = training_data[0]
        self.y = training_data[1]
        # self.x_val = validation_data[0]
        # self.y_val = validation_data[1]

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.x)
        roc = roc_auc_score(self.y, y_pred)
        # y_pred_val = self.model.predict(self.x_val)
        # roc_val = roc_auc_score(self.y_val, y_pred_val)
        # print('\rroc-auc: %s - roc-auc_val: %s' % (str(round(roc,4)),str(round(roc_val,4))),end=100*' '+'\n')
        print('\rroc-auc: {}'.format(round(roc, 5)), end=80 * ' ' + '\n')
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

In [None]:
def resnet50():
    resnet = ResNet50(include_top=False, weights='imagenet', input_shape=(height, width, 3), pooling='avg')
    last = resnet.output
    # x = Flatten()(last)
    # x = GlobalAveragePooling2D()(last)
    # x = Dropout(0.5)(last)
    # x = Dense(32, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[resnet.input], outputs=[x])

model = resnet50()
model.summary()

In [None]:
model.compile(
    loss='binary_crossentropy',
    optimizer=SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True),
    metrics=['accuracy']
)
model.fit(images, responses, batch_size=16, epochs=20, callbacks=[roc_callback(training_data=(images, responses))])

## Load test set and predict

In [None]:
def img_as_array(image_id, size=None, image_set='train_images'):
    image_path = os.path.join(data_path, image_set, image_id)
    img = cv2.imread(str(image_path))
    if size is None:
        return img
    return cv2.resize(img, size)

In [None]:
test_dir = 'leaderboard_test_data'
holdout_dir = 'leaderboard_holdout_data'

In [None]:
test_images = []
test_ids = []
for image_id in os.listdir(os.path.join(data_path, test_dir)):
    img = img_as_array(image_id, image_set=test_dir)
    test_images.append(img.reshape(1, height, width, 3))
    test_ids.append(image_id)
for image_id in os.listdir(os.path.join(data_path, holdout_dir)):
    img = img_as_array(image_id, image_set=holdout_dir)
    test_images.append(img.reshape(1, height, width, 3))
    test_ids.append(image_id)
test_images = np.concatenate(test_images, axis=0)

In [None]:
# test_images = test_images / 255.
# test_images = test_images * 2. / 255. - 1.
test_images = preprocess_input(test_images)

In [None]:
predictions = model.predict(test_images)

In [None]:
predictions = predictions.squeeze().tolist()

In [None]:
submission = pd.DataFrame({
    'image_id': test_ids,
    'has_oilpalm': predictions
}).sort_values('image_id')

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
# with open('submission.csv','w') as fout:
#     fout.write("image_id,has_oilpalm\n")
#     for image_id, has_oilpalm in zip(test_ids, predictions):
#         fout.write("{},{}\n".format(image_id, has_oilpalm))