In [1]:
import pandas as pd
import os
import numpy as np
from PIL import Image
import random
import cv2

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Model
from tensorflow.keras.layers import AveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.utils import Sequence

## Pre-processing Pipeline

In [None]:
def load_image(path, mode='RGB'):
    return Image.open(path)


def to_array(image):
    return np.asarray(image)


def to_image(array, mode='RGB'):
    return Image.fromarray(np.uint8(array), mode=mode)


def resize(image, size):
    return tf.image.resize(image, size)


def shift_hsv_uint8(img, hue_shift, sat_shift, val_shift):
    dtype = img.dtype
    img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    hue, sat, val = cv2.split(img)

    if hue_shift != 0:
        lut_hue = np.arange(0, 256, dtype=np.int16)
        lut_hue = np.mod(lut_hue + hue_shift, 180).astype(dtype)
        hue = cv2.LUT(hue, lut_hue)

    if sat_shift != 0:
        lut_sat = np.arange(0, 256, dtype=np.int16)
        lut_sat = np.clip(lut_sat + sat_shift, 0, 255).astype(dtype)
        sat = cv2.LUT(sat, lut_sat)

    if val_shift != 0:
        lut_val = np.arange(0, 256, dtype=np.int16)
        lut_val = np.clip(lut_val + val_shift, 0, 255).astype(dtype)
        val = cv2.LUT(val, lut_val)

    img = cv2.merge((hue, sat, val)).astype(dtype)
    img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
    return img


def blur(img, blur_limit):
    return cv2.blur(img, ksize=[blur_limit, blur_limit])


def gaussian_blur(img, blur_limit=(3, 7), sigma_limit=0):
    return cv2.GaussianBlur(img, ksize=blur_limit, sigmaX=sigma_limit)


def motion_blur(img, blur_limit=7):
    kmb = np.zeros((blur_limit, blur_limit))
    kmb[(blur_limit - 1) // 2, :] = np.ones(blur_limit)
    kmb = kmb / blur_limit
    return cv2.filter2D(img, -1, kernel=kmb)


def gaussian_noise(img):
    x = tf.compat.v1.placeholder(dtype=tf.float32, shape=[224, 224, 3])
    noise = tf.random.normal(shape=tf.shape(x), mean=0.0, stddev=1, dtype=tf.float32)
    return tf.add(img, noise)


def iso_noise(img, color_shift=0.05, intensity=0.5):
    one_over_255 = float(1.0 / 255.0)
    image = np.multiply(img, one_over_255, dtype=np.float32)
    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    _, stddev = cv2.meanStdDev(hls)

    luminance_noise = np.random.poisson(stddev[1] * intensity * 255, hls.shape[:2])
    color_noise = np.random.normal(0, color_shift * 360 * intensity, hls.shape[:2])

    hue = hls[..., 0]
    hue += color_noise
    hue[hue < 0] += 360
    hue[hue > 360] -= 360

    luminance = hls[..., 1]
    luminance += (luminance_noise / 255) * (1.0 - luminance)

    image = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) * 255
    return image


def normalize(img, mean, std, max_pixel_value=255.0):
    mean = np.array(mean, dtype=np.float32)
    mean *= max_pixel_value

    std = np.array(std, dtype=np.float32)
    std *= max_pixel_value

    denominator = np.reciprocal(std, dtype=np.float32)

    img = img.astype(np.float32)
    img -= mean
    img *= denominator
    return img

In [None]:
def get_transforms_train(image):
    temp = tf.image.random_crop(image, size=(112, 112, 3))
    temp = resize(temp, size=(224, 224))
    temp = tf.image.random_flip_left_right(temp)

    if random.choice([True, False]):
        k = random.randint(1, 4)
        temp = tf.image.rot90(temp, k=k)

    if random.choice([True, False]):
        hue_shift = random.uniform(-20, 20)
        sat_shift = random.uniform(-30, 30)
        val_shift = random.uniform(-20, 20)
        temp = shift_hsv_uint8(temp, hue_shift=hue_shift, sat_shift=sat_shift, val_shift=val_shift)

    if random.choice([True, False]):
        max_delta = random.uniform(-0.2, 0.2)
        temp = tf.image.random_brightness(temp, max_delta=max_delta)
        temp = tf.image.random_contrast(temp, lower=0.2, upper=2.0)
    else:
        gamma = random.uniform(0.8, 1.2)
        temp = tf.image.adjust_gamma(temp, gamma=gamma)

    one_of_blur = random.choice([1, 2, 3])

    if one_of_blur == 1:
        temp = blur(temp, blur_limit=7)
    elif one_of_blur == 2:
        temp = gaussian_blur(temp)
    elif one_of_blur == 3:
        temp = motion_blur(temp)

    if random.choice([True, False]):
        temp = gaussian_noise(temp)
    else:
        temp = iso_noise(temp)

    if random.choice([True, False]):
        hole_height = random.randint(8, 16)
        hole_width = random.randint(8,16)
        temp = tfa.image.cutout_ops.random_cutout(temp, (hole_height, hole_width))

    temp = normalize(temp, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0)

    return temp


def get_transforms_valid(image):
    temp = resize(image, size=(224, 224))
    temp = normalize(temp, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0)

    return temp

### Split data

In [None]:
df_train = pd.read_csv('../input/128128-sorghum-cultivar/train_meta.csv')
df_valid = pd.read_csv('../input/128128-sorghum-cultivar/valid_meta.csv')

print(f"train size: {len(df_train)}")
print(f"valid size: {len(df_valid)}")

print(df_train.cultivar.value_counts())
print(df_valid.cultivar.value_counts())

In [None]:
train_data_dir = '../input/128128-sorghum-cultivar/train/'
damage_images = []

for i, file in enumerate(os.listdir(train_data_dir)):
    print(f'{i + 1}/215162', end='\r')
    if os.path.getsize(os.path.join(train_data_dir, file)) // 1000 < 1:
        damage_images.append(file)

In [None]:
damage_images = pd.DataFrame(damage_images, columns=['image'])
damage_images

In [None]:
damage_indexes_train = []
damage_indexes_valid = []

for i, (file, label) in enumerate(df_train.values):
    print(f'{i + 1}/{df_train.shape[0]}', end='\r')
    if file in damage_images.image.values:
        damage_indexes_train.append(i)

for i, (file, label) in enumerate(df_valid.values):
    print(f'{i + 1}/{df_valid.shape[0]}', end='\r')
    if file in damage_images.image.values:
        damage_indexes_valid.append(i)

In [None]:
len(damage_indexes_train), len(damage_indexes_valid)

In [None]:
df_train.drop(damage_indexes_train, inplace=True)
df_valid.drop(damage_indexes_valid, inplace=True)

In [None]:
df_train

In [None]:
df_valid

## Training

In [None]:
model = EfficientNetB3(include_top=False, input_shape=(128, 128, 3), weights='imagenet')

In [None]:
x_ = AveragePooling2D(pool_size=(3, 3))(model.layers[-1].output)
x_ = Flatten()(x_)
x_ = BatchNormalization()(x_)
x_ = Dropout(0.5)(x_)
output_layer = Dense(units=100, activation='softmax')(x_)

model = Model(model.input, output_layer)

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=categorical_crossentropy,
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
train_generator = ImageDataGenerator(rescale=1 / 255.,
                                     samplewise_center=True,
                                     samplewise_std_normalization=True).flow_from_dataframe(dataframe=df_train,
                                                                                            directory='../input/128128-sorghum-cultivar/train/',
                                                                                            x_col='image',
                                                                                            y_col='cultivar',
                                                                                            batch_size=32,
                                                                                            target_size=(128, 128))
valid_generator = ImageDataGenerator(rescale=1 / 255.,
                                     samplewise_center=True,
                                     samplewise_std_normalization=True).flow_from_dataframe(dataframe=df_valid,
                                                                                            directory='../input/128128-sorghum-cultivar/train',
                                                                                            x_col='image',
                                                                                            y_col='cultivar',
                                                                                            batch_size=32,
                                                                                            target_size=(128, 128))

In [None]:
model_checkpoint_callback = ModelCheckpoint(filepath=os.path.join('best-checkpoint.hdf5'),
                                            save_weights_only=False,
                                            monitor='val_accuracy',
                                            mode='max',
                                            verbose=1,
                                            save_best_only=True)

early_stop = EarlyStopping(monitor='val_loss',
                           patience=15,
                           verbose=1,
                           restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              verbose=1,
                              patience=5)

In [None]:
history = model.fit(train_generator,
                    validation_data=valid_generator,
                    epochs=50,
                    steps_per_epoch=2350,
                    validation_steps=400,
                    callbacks=[early_stop, model_checkpoint_callback, reduce_lr])

In [None]:
model.evaluate(validation_batches)

In [None]:
temp_1 = pd.DataFrame(history.history)
temp_1.to_pickle('history.pkl')

In [None]:
model.save('model.hdf5')

In [None]:
train_batches.class_indices

In [None]:
import json

with open('class_indices.json', 'w') as file:
    json.dump(train_batches.class_indices, file)

In [None]:
test_preds = []

for i, file in enumerate(os.listdir('../input/sorghum-cultivar-identification-512512/test/')):
    img = resize(
        to_array(load_image(os.path.join('../input/sorghum-cultivar-identification-512512/test/', file))) / 255.,
        (128, 128))
    img_arr = np.expand_dims(to_array(img), axis=0)
    preds = np.argmax(model.predict(img_arr)[0])

    label = list(train_batches.class_indices.keys())[list(train_batches.class_indices.values()).index(preds)]

    test_preds.append([file, label])

    print(f'{i + 1}/{len(os.listdir("../input/sorghum-cultivar-identification-512512/test/"))}', end='\r')

In [None]:
test_preds