# Preprocessing

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
import os

In [2]:
from keras.models import Model
from keras.applications.nasnet import NASNetMobile
from keras.applications.nasnet import preprocess_input
from keras.preprocessing import image

Using TensorFlow backend.


## Cleaning 

In [6]:
# training image directory path
TRAIN_DIR = './data/train/'
# preprocessed training image directory path
TRAIN_CLEAN_DIR = './data/train_clean/'

In [7]:
# load labels
labels = pd.read_csv('./data/train.csv')

### Color Model Standarization

A large fraction of the images are either grayscale or binary (black and white).
The simplest option here is to convert the whole image corpus to grayscale in order to avoid various issues that may arise due to representation non-conformity. However, chroma information can possibly be a useful discrimination aid for the majority of images which do contain it. Hence, we'll avoid discarding it by augmenting our dataset with grayscale images to reduce learned reliance on color features. We'll handle the last issue in the augmentation section.

In [34]:
# NOTE:
# cv2.imread(path,cv2.IMREAD_COLOR) implicitly upchannels grayscale images to BGR

## Resolution Standarization

We'll resize to 700x1050 pixels (most samples are near these dimensions) by zero padding to a 3/2 aspect ratio, then do the actual resizing via `cv2.resize` (linear interpolation).

In [36]:
ASPECT_RATIO = 3/2
def get_padded_dimensions(img):
    h,w,d = img.shape
    if w/h > ASPECT_RATIO:
        h = int(np.round(w/ASPECT_RATIO))
    else: 
        w = int(h*ASPECT_RATIO)
    return h,w,d

## Subsampling

700x1050 pixels is too many features to compute from in a short amount of time (given our limited resources). We'll use 140x210 pixel images (roughly in-line with ImageNet dataset dimensions) to enable faster training and inference.

## Data Selection

We'll mostly ignore the `new_whale` class since it's both too coarsly defined and composes the majority of the data - this might lead to model confusion. Furthermore, there's enough data to provide ample negative and positive examplars whenafter discarding. 

In [112]:
labels = labels[labels.Id != 'new_whale']

## Preprocessing Pipeline

In [None]:
def preprocess(img_file_path):
    # implicitly upchannels grayscale images
    img = cv2.imread(img_file_path, cv2.IMREAD_COLOR)
    # get padded image dimensions
    h,w,d = get_padded_dimensions(img)
    # apply padding
    new_img = np.zeros((h,w,d),dtype=np.uint8)
    new_img[:img.shape[0],:img.shape[1],:] = img
    # downsample
    new_img = cv2.resize(new_img, (210,140))
    return new_img

In [57]:
# apply preprocessing to training set
for img_name in labels.Image:
    img =  preprocess(TRAIN_DIR+img_name)
    cv2.imwrite(TRAIN_CLEAN_DIR+img_name, img)

## Data Augmentation

A significant number of known whales show up in only a single image. We'll augment the dataset by duplicating and distorting images of single image sample labels. We mostly want to generate enough samples to reduce chromatic bias/confusion and generate a second sample for the single image sample classes. 

In [99]:
# based on code from https://github.com/mdbloice/Augmentor
def perspective_augmentation(img): 
    h,w = img.shape[:2]
    # randomly select the skew amount
    skew_amount = np.random.randint(1, max(w, h))
    # image corner positions
    x1 = 0; x2 = h; y1 = 0; y2 = w
    original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)]
    # randomly select a direction tilt direction
    skew_direction = np.random.randint(0, 3)
    if skew_direction == 0:
        # Left Tilt
        new_plane = [(y1, x1 - skew_amount),  # Top Left
                     (y2, x1),                # Top Right
                     (y2, x2),                # Bottom Right
                     (y1, x2 + skew_amount)]  # Bottom Left
    elif skew_direction == 1:
        # Right Tilt
        new_plane = [(y1, x1),                # Top Left
                     (y2, x1 - skew_amount),  # Top Right
                     (y2, x2 + skew_amount),  # Bottom Right
                     (y1, x2)]                # Bottom Left
    elif skew_direction == 2:
        # Forward Tilt
        new_plane = [(y1 - skew_amount, x1),  # Top Left
                     (y2 + skew_amount, x1),  # Top Right
                     (y2, x2),                # Bottom Right
                     (y1, x2)]                # Bottom Left
    elif skew_direction == 3:
        # Backward Tilt
        new_plane = [(y1, x1),                # Top Left
                     (y2, x1),                # Top Right
                     (y2 + skew_amount, x2),  # Bottom Right
                     (y1 - skew_amount, x2)]  # Bottom Left
    # calculate perspective transform matrix coefficients
    M = cv2.getPerspectiveTransform(np.float32(original_plane), np.float32(new_plane))
    # also do a zoom augmentation (i.e scale transform) while we're at it 
    zoom_coefficient = np.random.uniform(0.8,1.2)
    M[0,0] = M[0,0]*zoom_coefficient
    M[1,1] = M[1,1]*zoom_coefficient
    # apply perspective transform
    dst = cv2.warpPerspective(img,M,(210,140))
    return dst

In [100]:
# select a single sample from each class
unique_labels = labels.drop_duplicates(subset=['Id'])
unique_labels.count()

Image    5004
Id       5004
dtype: int64

In [81]:
for lbl in unique_labels.iterrows():
    img = cv2.imread(TRAIN_CLEAN_DIR+lbl.Image, cv2.IMREAD_GRAYSCALE)  # load as monochrome 
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)  # upchannel to BGR
    # perspective and zoom augmentation
    img = perspective_augmentation(img)
    # create augmented image entry
    cv2.imwrite(TRAIN_CLEAN_DIR+lbl.Image, img)

In [None]:
# add augmented images to label data
unique_labels.Image = 'aug_'+unique_labels.Image
labels.append(unique_labels)
labels = labels.reset_index(drop=True)

In [395]:
labels.to_csv('./data/clean_labels.csv')