In [1]:
import os
import PIL.Image
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_io as tfio

from matplotlib import pyplot as plt
from tensorflow.keras.utils import array_to_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### benchmarking model

In [2]:
optimizer = Adam()


def vgg_net16(input_shape=(224, 224, 3), classes=None):
    # input layer
    input_layer = Input(shape=input_shape, name='input_')

    # first conv block
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(input_layer)
    x = BatchNormalization()(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # second conv block
    x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # third conv block
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # fourth conv block
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # fifth conv block
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=relu)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')(x)

    # classifier
    x = Flatten()(x)
    x = Dense(units=512, activation=relu)(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(units=512, activation=relu)(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(units=classes, activation=softmax)(x)

    model = Model(input_layer, x)
    model.compile(optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy'])
    model.summary()

    return model

### callbacks

In [3]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, min_delta=0.0000001)

## ***flip***

In [None]:
def flip(path, dst):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        dst_lr_path = os.path.join(dst, f'_flip_l_r_{image}')
        dst_ud_path = os.path.join(dst, f'_flip_u_d_{image}')
        img = PIL.Image.open(image_path)
        flip_lr = array_to_img(tf.image.flip_left_right(img_to_array(img)))
        flip_ud = array_to_img(tf.image.flip_up_down(img_to_array(img)))

        # save flipped image in destination
        flip_lr.save(dst_lr_path)
        flip_ud.save(dst_ud_path)
        img.save(os.path.join(dst, image))

In [None]:
if not os.path.isdir('flip'):
    os.mkdir('flip')
    os.mkdir('flip/0')
    os.mkdir('flip/1')
    
flip('../input/innovation-species-resized-data/resized/0','flip/0')
flip('../input/innovation-species-resized-data/resized/1','flip/1')

## Baseline

In [None]:
base_train_generator = ImageDataGenerator(rescale=1/255.,
                                          validation_split=0.4)
base_train = base_train_generator.flow_from_directory('../input/innovation-species-resized-data/resized', target_size=(224,224), subset='training')
base_val = base_train_generator.flow_from_directory('../input/innovation-species-resized-data/resized', target_size=(224,224), subset='validation')

In [4]:
test_generator = ImageDataGenerator(rescale=1/255.)
evaluation = test_generator.flow_from_directory('../input/innovation-species-resized-data/test', target_size=(224,224))

In [None]:
baseline_model = vgg_net16(classes=2)
baseline_history = baseline_model.fit(base_train,
                                      validation_data=base_val,
                                      epochs=100,
                                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(baseline_history.history)
temp.to_pickle('baseline.pkl')

In [None]:
baseline_model.evaluate(evaluation)

## Flipped

In [None]:
flip_train = base_train_generator.flow_from_directory('flip', target_size=(224,224), subset='training')
flip_val = base_train_generator.flow_from_directory('flip', target_size=(224,224), subset='validation')

In [None]:
flip_model = vgg_net16(classes=2)
flip_history = flip_model.fit(flip_train,
                              validation_data=flip_val,
                              epochs=100,
                              callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(flip_history.history)
temp.to_pickle('flip.pkl')

In [None]:
flip_model.evaluate(evaluation)

### ***isolate color chanels***

In [None]:
def rgb_channel_isolation(path, dst):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        img = img_to_array(PIL.Image.open(image_path))
        r = img.copy()
        g = img.copy()
        b = img.copy()
        r[:, :, 1:3] = 0
        g[:, :, 0] = 0
        g[:, :, 2] = 0
        b[:, :, 0:2] = 0

        # red component
        r = array_to_img(r)
        r.save(os.path.join(dst, f'_isolate_r_{image}'))
        # green component
        g = array_to_img(g)
        g.save(os.path.join(dst, f'_isolate_g_{image}'))
        # blue component
        b = array_to_img(b)
        b.save(os.path.join(dst, f'_isolate_b_{image}'))
        # original
        img = array_to_img(img)
        img.save(os.path.join(dst, image))

In [None]:
if not os.path.isdir('isolated'):
    os.mkdir('isolated')
    os.mkdir('isolated/0')
    os.mkdir('isolated/1')
    
rgb_channel_isolation('../input/innovation-species-resized-data/resized/0', 'isolated/0')
rgb_channel_isolation('../input/innovation-species-resized-data/resized/1', 'isolated/1')

In [None]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('isolated', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('isolated', target_size=(224,224), subset='validation')

In [None]:
model = vgg_net16(classes=2)
iso_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(iso_history.history)
temp.to_pickle('iso.pkl')

In [None]:
model.evaluate(evaluation)

### ***color property changes***

In [None]:
def random_changes_to_color_properties(path, dst, delta=None, gamma_transformation=True, change_contrast=True,
                                       factor=None, steps=1):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        img = img_to_array(PIL.Image.open(image_path))

        if delta is None:
            for i in range(steps):
                change_factor = np.round(np.random.uniform(-1, 1), 2)
                bc_img = array_to_img(tf.image.adjust_brightness(img, change_factor))
                hue_img = array_to_img(tf.image.adjust_hue(img, change_factor))
                sat_img = array_to_img(tf.image.adjust_saturation(img, change_factor))

                # save transformed images
                bc_img.save(os.path.join(dst, f'bc_{i}_{image}'))
                hue_img.save(os.path.join(dst, f'hue_{i}_{image}'))
                sat_img.save(os.path.join(dst, f'sat_{i}_{image}'))

        elif isinstance(delta, float):
            for i in range(steps):
                change_factor = np.round(np.random.uniform(-1 * delta, 1 * delta), 2)
                bc_img = array_to_img(tf.image.adjust_brightness(img, change_factor))
                hue_img = array_to_img(tf.image.adjust_hue(img, change_factor))
                sat_img = array_to_img(tf.image.adjust_saturation(img, change_factor))

                # save transformed images
                bc_img.save(os.path.join(dst, f'bc_{i}_{image}'))
                hue_img.save(os.path.join(dst, f'hue_{i}_{image}'))
                sat_img.save(os.path.join(dst, f'sat_{i}_{image}'))

        if factor is None:
            if gamma_transformation:
                for i in range(steps):
                    gamma = np.round(np.random.uniform(1, 5), 2)
                    gamma_img = array_to_img(tf.image.adjust_gamma(img, gamma))
                    gamma_img.save(os.path.join(dst, f'gamma_img_{i}_{image}'))

            if change_contrast:
                for i in range(steps):
                    change_factor = np.round(np.random.uniform(-5, 5), 2)
                    cont_img = array_to_img(tf.image.adjust_contrast(img, change_factor))
                    cont_img.save(os.path.join(dst, f'cont_img_{i}_{image}'))

        elif isinstance(factor, int):
            if gamma_transformation:
                for i in range(steps):
                    gamma = np.round(np.random.uniform(1, factor), 2)
                    gamma_img = array_to_img(tf.image.adjust_gamma(img, gamma))
                    gamma_img.save(os.path.join(dst, f'gamma_img_{i}_{image}'))

            if change_contrast:
                for i in range(steps):
                    factor = np.round(np.random.uniform(-1 * factor, factor), 2)
                    cont_img = array_to_img(tf.image.adjust_contrast(img, factor))
                    cont_img.save(os.path.join(dst, f'cont_img_{i}_{image}'))

        array_to_img(img).save(os.path.join(dst, image))

In [None]:
if not os.path.isdir('color_prop'):
    os.mkdir('color_prop')
    os.mkdir('color_prop/0')
    os.mkdir('color_prop/1')
    
random_changes_to_color_properties('../input/innovation-species-resized-data/resized/0', 'color_prop/0')
random_changes_to_color_properties('../input/innovation-species-resized-data/resized/1', 'color_prop/1')

In [None]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('color_prop', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('color_prop', target_size=(224,224), subset='validation')

In [None]:
model = vgg_net16(classes=2)
ccp_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(ccp_history.history)
temp.to_pickle('change_color_prop.pkl')

In [None]:
model.evaluate(evaluation)

### Cropping

In [8]:
def cropping(path, dst, central=True, random=False, fraction_low=0.5, fraction_high=0.9, random_width=224,
             random_height=224, amount=3):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        img = img_to_array(PIL.Image.open(image_path))

        if central:
            for i in range(amount):
                crop_area = np.round(np.random.uniform(fraction_low, fraction_high), 2)
                cc_img = tf.image.central_crop(img, central_fraction=crop_area)
                array_to_img(cc_img).save(os.path.join(dst, f'cc_img_{i}_{image}'))

        elif random:
            for i in range(amount):
                rc_img = tf.image.random_crop(img, size=[random_width, random_height, 3])
                array_to_img(rc_img).save(os.path.join(dst, f'rc_img_{i}_{image}'))

        array_to_img(img).save(os.path.join(dst, image))

### ***central crop***

In [None]:
if not os.path.isdir('cen_crop'):
    os.mkdir('cen_crop')
    os.mkdir('cen_crop/0')
    os.mkdir('cen_crop/1')
    
cropping('../input/innovation-species-resized-data/resized/0', 'cen_crop/0')
cropping('../input/innovation-species-resized-data/resized/1', 'cen_crop/1')

In [None]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('cen_crop', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('cen_crop', target_size=(224,224), subset='validation')

In [None]:
model = vgg_net16(classes=2)
ccp_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(ccp_history.history)
temp.to_pickle('central_crop.pkl')

In [None]:
model.evaluate(evaluation)

### ***random crop***

In [9]:
if not os.path.isdir('rand_crop'):
    os.mkdir('rand_crop')
    os.mkdir('rand_crop/0')
    os.mkdir('rand_crop/1')
    
cropping('../input/innovation-species-resized-data/resized/0', 'rand_crop/0', central=False, random=True)
cropping('../input/innovation-species-resized-data/resized/1', 'rand_crop/1', central=False, random=True)

In [10]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('rand_crop', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('rand_crop', target_size=(224,224), subset='validation')

In [15]:
model = vgg_net16(classes=2)
ccp_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(ccp_history.history)
temp.to_pickle('random_crop.pkl')

In [16]:
model.evaluate(evaluation)

### Rotation

In [5]:
def rotation(path, dst, angel=30, amount=3):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        img = img_to_array(PIL.Image.open(image_path))

        for i in range(amount):
            random_angel = np.random.randint(-1 * angel, angel)
            ra_img = tfa.image.rotate(img, random_angel)
            array_to_img(ra_img).save(os.path.join(dst, f'ra_img_{i}_{image}'))

        array_to_img(img).save(os.path.join(dst, image))

In [7]:
if not os.path.isdir('rand_rotate'):
    os.mkdir('rand_rotate')
    os.mkdir('rand_rotate/0')
    os.mkdir('rand_rotate/1')
    
rotation('../input/innovation-species-resized-data/resized/0', 'rand_rotate/0')
rotation('../input/innovation-species-resized-data/resized/1', 'rand_rotate/1')

In [8]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('rand_rotate', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('rand_rotate', target_size=(224,224), subset='validation')

In [13]:
model = vgg_net16(classes=2)
ccp_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(ccp_history.history)
temp.to_pickle('random_rotation.pkl')

In [14]:
model.evaluate(evaluation)

### Translations

In [15]:
def translation(path, dst, hshift_amount=0.3, wshift_amount=0.3, height_shift=True, width_shift=True, amount=3):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        img = img_to_array(PIL.Image.open(image_path))

        for i in range(amount):
            random_hshift = np.random.uniform(0.1, hshift_amount)
            random_wshift = np.random.uniform(0.1, wshift_amount)
            rs_img = (tf.keras.preprocessing.image.random_shift(img, random_wshift, random_hshift, channel_axis=2,
                                                                row_axis=0, col_axis=1,
                                                                fill_mode='reflect')).astype(np.uint8)
            array_to_img(rs_img).save(os.path.join(dst, f'rs_img_{i}_{image}'))

        array_to_img(img).save(os.path.join(dst, image))

In [16]:
if not os.path.isdir('rand_translation'):
    os.mkdir('rand_translation')
    os.mkdir('rand_translation/0')
    os.mkdir('rand_translation/1')
    
translation('../input/innovation-species-resized-data/resized/0', 'rand_translation/0')
translation('../input/innovation-species-resized-data/resized/1', 'rand_translation/1')

In [17]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('rand_translation', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('rand_translation', target_size=(224,224), subset='validation')

In [None]:
model = vgg_net16(classes=2)
ccp_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(ccp_history.history)
temp.to_pickle('rand_translation.pkl')

### Noise injection

In [None]:
def noise_injection(path, dst, magnitude=0.5, amount=3):
    images = os.listdir(path)

    for image in images:
        image_path = os.path.join(path, image)
        img = img_to_array(PIL.Image.open(image_path)) / 255.

        for i in range(amount):
            random_magnitude = np.round(np.random.uniform(0, magnitude), 2)
            noise = np.random.normal(0, np.round(random_magnitude, decimals=3), size=img.shape)
            ni_img = array_to_img((img + noise))
            array_to_img(ni_img).save(os.path.join(dst, f'ni_img_{i}_{image}'))

        array_to_img(img).save(os.path.join(dst, image))

In [None]:
if not os.path.isdir('noise_injection'):
    os.mkdir('noise_injection')
    os.mkdir('noise_injection/0')
    os.mkdir('noise_injection/1')

translation('../input/innovation-species-resized-data/resized/0', 'noise_injection/0')
translation('../input/innovation-species-resized-data/resized/1', 'noise_injection/1')

In [None]:
train_generator = ImageDataGenerator(rescale=1/255.,
                                     validation_split=0.4)
train = train_generator.flow_from_directory('noise_injection', target_size=(224,224), subset='training')
val = train_generator.flow_from_directory('noise_injection', target_size=(224,224), subset='validation')

In [None]:
model = vgg_net16(classes=2)
ccp_history = model.fit(train,
                      validation_data=val,
                      epochs=100,
                      callbacks=[early_stop,reduce_lr])

temp = pd.DataFrame(ccp_history.history)
temp.to_pickle('random_noise.pkl')