# Use different size crop of raw images from center for prediction

In [53]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import cv2
import math
from glob import glob
import os
import sys
from keras.models import load_model
model = None

## Load and preprocess training images

In [54]:
master = pd.read_csv("../input/train_labels.csv")
img_path = "../input/train/"

y = []
file_paths = []
for i in range(len(master)):
    file_paths.append( img_path + str(master.iloc[i, 0]) +'.jpg' ) # .ix same as .iloc
    y.append(master.iloc[i,1])
y = np.array(y)

In [55]:
#image resize & centering & crop 

def centering_image(img):
    size = [256,256]
    
    img_size = img.shape[:2]
    
    # centering
    row = (size[1] - img_size[0]) // 2
    col = (size[0] - img_size[1]) // 2
    resized = np.zeros(list(size) + [img.shape[2]], dtype=np.uint8)
    resized[row:(row + img.shape[0]), col:(col + img.shape[1])] = img

    return resized

## Crop images with different sizes

In [56]:
X = []
for file_path in file_paths:
    sys.stdout.write("\r {0} from total {1} images".format(file_path, len(file_paths)))
    sys.stdout.flush()
    #read image
    img = cv2.imread(file_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    sub_images = []
    for i in range(3):
        dw = img.shape[1] // 4
        sub_img = img[:, dw*i:dw*(i+2), :]

        #resize
        if(sub_img.shape[0] > sub_img.shape[1]):
            tile_size = (int(sub_img.shape[1]*256/sub_img.shape[0]),256)
        else:
            tile_size = (256, int(sub_img.shape[0]*256/sub_img.shape[1]))

        #centering
        sub_img = centering_image(cv2.resize(sub_img, dsize=tile_size))
    
        #output 224*224px 
        sub_img = sub_img[16:240, 16:240]
        sub_img = sub_img.astype('float32')
        sub_img /= 255.0
        sub_images.append(sub_img)
    #process whole image
    sub_img = img
    #resize
    if(sub_img.shape[0] > sub_img.shape[1]):
        tile_size = (int(sub_img.shape[1]*256/sub_img.shape[0]),256)
    else:
        tile_size = (256, int(sub_img.shape[0]*256/sub_img.shape[1]))
    #centering
    sub_img = centering_image(cv2.resize(sub_img, dsize=tile_size))
    #output 224*224px 
    sub_img = sub_img[16:240, 16:240]
    sub_img = sub_img.astype('float32')
    sub_img /= 255.0
    sub_images.append(sub_img)
    
    X.append(sub_images)

X = np.array(X)

 ../input/train/2295.jpg from total 2295 images

In [57]:
inshape = X.shape
inshape

(2295, 4, 224, 224, 3)

## Reshape array X and normalize

In [58]:
X = X.reshape((-1, ) + inshape[-3:])

## Load saved model `VGG16-transferlearning.model`

In [59]:
if not model: 
    model = load_model('VGG16-transferlearning.model')

In [60]:
y_pred = model.predict(X)
y_pred = y_pred.reshape(inshape[:2])

In [61]:
y_pred_mean = np.mean(y_pred, axis=1)
y_pred_max = np.amax(y_pred, axis=1)

## Compare label and predictions

In [62]:
y_pred_mean_cls = (y_pred_mean > 0.5).astype('int')
y_pred_max_cls = (y_pred_max > 0.5).astype('int')

In [63]:
df_pred2 = master.copy()
df_pred2['prob_mean'] = y_pred_mean
df_pred2['pred_mean'] = y_pred_mean_cls
df_pred2['prob_max'] = y_pred_max
df_pred2['pred_max'] = y_pred_max_cls
df_pred2['miscls_mean'] = (df_pred2.invasive == df_pred2.pred_mean).astype('int')
df_pred2['miscls_max'] = (df_pred2.invasive == df_pred2.pred_max).astype('int')

In [64]:
print('misclassified with mean: {}'.format(len(df_pred2[df_pred2.miscls_mean==0])))
print('misclassified with max: {}'.format(len(df_pred2[df_pred2.miscls_max==0])))

misclassified with mean: 26
misclassified with max: 44


# Predict on test dataset

## Load and preprocess test images

In [65]:
sample_submission = pd.read_csv("../input/sample_submission.csv")
img_path = "../input/test/"

test_names = []
file_paths = []

for i in range(len(sample_submission)):
    test_names.append(sample_submission.iloc[i,0])
    file_paths.append( img_path + str(int(sample_submission.iloc[i,0])) +'.jpg' )

test_names = np.array(test_names)

In [66]:
X_test = []
for file_path in file_paths:
    sys.stdout.write("\r {0} from total {1} images".format(file_path, len(file_paths)))
    sys.stdout.flush()
    #read image
    img = cv2.imread(file_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    sub_images = []
    for i in range(3):
        dw = img.shape[1] // 4
        sub_img = img[:, dw*i:dw*(i+2), :]

        #resize
        if(sub_img.shape[0] > sub_img.shape[1]):
            tile_size = (int(sub_img.shape[1]*256/sub_img.shape[0]),256)
        else:
            tile_size = (256, int(sub_img.shape[0]*256/sub_img.shape[1]))

        #centering
        sub_img = centering_image(cv2.resize(sub_img, dsize=tile_size))
    
        #output 224*224px 
        sub_img = sub_img[16:240, 16:240]
        sub_img = sub_img.astype('float32')
        sub_img /= 255.0
        sub_images.append(sub_img)
    #process whole image
    sub_img = img
    #resize
    if(sub_img.shape[0] > sub_img.shape[1]):
        tile_size = (int(sub_img.shape[1]*256/sub_img.shape[0]),256)
    else:
        tile_size = (256, int(sub_img.shape[0]*256/sub_img.shape[1]))
    #centering
    sub_img = centering_image(cv2.resize(sub_img, dsize=tile_size))
    #output 224*224px 
    sub_img = sub_img[16:240, 16:240]
    sub_img = sub_img.astype('float32')
    sub_img /= 255.0
    sub_images.append(sub_img)
    
    X_test.append(sub_images)

X_test = np.array(X_test)

 ../input/test/1531.jpg from total 1531 images

In [68]:
inshape = X_test.shape
inshape

(1531, 4, 224, 224, 3)

In [70]:
X_test = X_test.reshape((-1, ) + inshape[-3:])

## Make predictions

In [71]:
y_test_pred = model.predict(X_test)
y_test_pred = y_test_pred.reshape(inshape[:2])

In [72]:
y_test_pred_mean = np.mean(y_test_pred, axis=1)
y_test_pred_max = np.amax(y_test_pred, axis=1)

## Write predictions to file

In [74]:
def submission_to_csv(test_preds, file_path):
    sample_submission = pd.read_csv("../input/sample_submission.csv")
    for i, name in enumerate(test_names):
        sample_submission.loc[sample_submission['name'] == name, 'invasive'] = test_preds[i]
    sample_submission.to_csv(file_path, index=False)

In [None]:
submission_to_csv(test_preds=y_test_pred_mean, file_path='submit_mean.csv')

In [None]:
submission_to_csv(test_preds=y_test_pred_max, file_path='submit_max.csv')