In [None]:
import pandas as pd
import os
import numpy as np
import random
import cv2
import sys
from PIL import Image

In [None]:
import tensorflow as tf
import math
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from tensorflow import keras

from sklearn.model_selection import StratifiedKFold, train_test_split
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import AveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.utils import array_to_img, load_img, img_to_array

In [None]:
K = keras.backend


class OneCycleLr(keras.callbacks.Callback):
    def __init__(self,
                 max_lr: float,
                 total_steps: int = None,
                 epochs: int = None,
                 steps_per_epoch: int = None,
                 pct_start: float = 0.2,
                 anneal_strategy: str = "cos",
                 cycle_momentum: bool = True,
                 base_momentum: float = 0.85,
                 max_momentum: float = 0.95,
                 div_factor: float = 1.0e+3,
                 final_div_factor: float = 1e4,
                 ) -> None:

        super(OneCycleLr, self).__init__()

        # validate total steps:
        if total_steps is None and epochs is None and steps_per_epoch is None:
            raise ValueError(
                "You must define either total_steps OR (epochs AND steps_per_epoch)"
            )
        elif total_steps is not None:
            if total_steps <= 0 or not isinstance(total_steps, int):
                raise ValueError(
                    "Expected non-negative integer total_steps, but got {}".format(
                        total_steps
                    )
                )
            self.total_steps = total_steps
        else:
            if epochs <= 0 or not isinstance(epochs, int):
                raise ValueError(
                    "Expected non-negative integer epochs, but got {}".format(
                        epochs)
                )
            if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int):
                raise ValueError(
                    "Expected non-negative integer steps_per_epoch, but got {}".format(
                        steps_per_epoch
                    )
                )
            # Compute total steps
            self.total_steps = epochs * steps_per_epoch

        self.step_num = 0
        self.step_size_up = float(pct_start * self.total_steps) - 1
        self.step_size_down = float(self.total_steps - self.step_size_up) - 1

        # Validate pct_start
        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
            raise ValueError(
                "Expected float between 0 and 1 pct_start, but got {}".format(
                    pct_start)
            )

        # Validate anneal_strategy
        if anneal_strategy not in ["cos", "linear"]:
            raise ValueError(
                "anneal_strategy must by one of 'cos' or 'linear', instead got {}".format(
                    anneal_strategy
                )
            )
        elif anneal_strategy == "cos":
            self.anneal_func = self._annealing_cos
        elif anneal_strategy == "linear":
            self.anneal_func = self._annealing_linear

        # Initialize learning rate variables
        self.initial_lr = max_lr / div_factor
        self.max_lr = max_lr
        self.min_lr = self.initial_lr / final_div_factor

        # Initial momentum variables
        self.cycle_momentum = cycle_momentum
        if self.cycle_momentum:
            self.m_momentum = max_momentum
            self.momentum = max_momentum
            self.b_momentum = base_momentum

        # Initialize variable to learning_rate & momentum
        self.track_lr = []
        self.track_mom = []

    def _annealing_cos(self, start, end, pct) -> float:
        "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
        cos_out = math.cos(math.pi * pct) + 1
        return end + (start - end) / 2.0 * cos_out

    def _annealing_linear(self, start, end, pct) -> float:
        "Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0."
        return (end - start) * pct + start

    def set_lr_mom(self) -> None:
        """Update the learning rate and momentum"""
        if self.step_num <= self.step_size_up:
            # update learining rate
            computed_lr = self.anneal_func(
                self.initial_lr, self.max_lr, self.step_num / self.step_size_up
            )
            K.set_value(self.model.optimizer.lr, computed_lr)
            # update momentum if cycle_momentum
            if self.cycle_momentum:
                computed_momentum = self.anneal_func(
                    self.m_momentum, self.b_momentum, self.step_num / self.step_size_up
                )
                try:
                    K.set_value(self.model.optimizer.momentum,
                                computed_momentum)
                except:
                    K.set_value(self.model.optimizer.beta_1, computed_momentum)
        else:
            down_step_num = self.step_num - self.step_size_up
            # update learning rate
            computed_lr = self.anneal_func(
                self.max_lr, self.min_lr, down_step_num / self.step_size_down
            )
            K.set_value(self.model.optimizer.lr, computed_lr)
            # update momentum if cycle_momentum
            if self.cycle_momentum:
                computed_momentum = self.anneal_func(
                    self.b_momentum,
                    self.m_momentum,
                    down_step_num / self.step_size_down,
                )
                try:
                    K.set_value(self.model.optimizer.momentum,
                                computed_momentum)
                except:
                    K.set_value(self.model.optimizer.beta_1, computed_momentum)

    def on_train_begin(self, logs=None) -> None:
        # Set initial learning rate & momentum values
        K.set_value(self.model.optimizer.lr, self.initial_lr)
        if self.cycle_momentum:
            try:
                K.set_value(self.model.optimizer.momentum, self.momentum)
            except:
                K.set_value(self.model.optimizer.beta_1, self.momentum)

    def on_train_batch_end(self, batch, logs=None) -> None:
        # Grab the current learning rate & momentum
        lr = float(K.get_value(self.model.optimizer.lr))
        try:
            mom = float(K.get_value(self.model.optimizer.momentum))
        except:
            mom = float(K.get_value(self.model.optimizer.beta_1))
        # Append to the list
        self.track_lr.append(lr)
        self.track_mom.append(mom)
        # Update learning rate & momentum
        self.set_lr_mom()
        # increment step_num
        self.step_num += 1

    def plot_lrs_moms(self, axes=None) -> None:
        if axes == None:
            _, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        else:
            try:
                ax1, ax2 = axes
            except:
                ax1, ax2 = axes[0], axes[1]
        ax1.plot(self.track_lr)
        ax1.set_title("Learning Rate vs Steps")
        ax2.plot(self.track_mom)
        ax2.set_title("Momentum (or beta_1) vs Steps")

## Pre-processing Pipeline

In [None]:
def load_image(path, mode='RGB'):
    return Image.open(path)


def to_array(image):
    return np.asarray(image)


def to_image(array, mode='RGB'):
    return Image.fromarray(np.uint8(array), mode=mode)


def resize(image, size):
    return tf.image.resize(image, size)


def blur(img, blur_limit):
    return cv2.blur(img, ksize=[blur_limit, blur_limit])


def gaussian_blur(img, blur_limit=(3, 7), sigma_limit=0):
    return cv2.GaussianBlur(img, ksize=blur_limit, sigmaX=sigma_limit)


def motion_blur(img, blur_limit=7):
    kmb = np.zeros((blur_limit, blur_limit))
    kmb[(blur_limit - 1) // 2, :] = np.ones(blur_limit)
    kmb = kmb / blur_limit
    return cv2.filter2D(img, -1, kernel=kmb)


def gaussian_noise(img):
    x = tf.compat.v1.placeholder(dtype=tf.float32, shape=[224, 224, 3])
    noise = tf.random.normal(shape=tf.shape(x), mean=0.0, stddev=1, dtype=tf.float32)
    return tf.add(img, noise)


def iso_noise(img, color_shift=0.05, intensity=0.5):
    one_over_255 = float(1.0 / 255.0)
    image = np.multiply(img, one_over_255, dtype=np.float32)
    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    _, stddev = cv2.meanStdDev(hls)

    luminance_noise = np.random.poisson(stddev[1] * intensity * 255, hls.shape[:2])
    color_noise = np.random.normal(0, color_shift * 360 * intensity, hls.shape[:2])

    hue = hls[..., 0]
    hue += color_noise
    hue[hue < 0] += 360
    hue[hue > 360] -= 360

    luminance = hls[..., 1]
    luminance += (luminance_noise / 255) * (1.0 - luminance)

    image = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) * 255
    return image.astype(np.uint8)


def random_cut_out(images):
    return tfa.image.random_cutout(images, (72, 72), constant_values=0)


def normalize(img, mean, std, max_pixel_value=255.0):
    mean = np.array(mean, dtype=np.float32)
    mean *= max_pixel_value

    std = np.array(std, dtype=np.float32)
    std *= max_pixel_value

    denominator = np.reciprocal(std, dtype=np.float32)

    img = img.astype(np.float32)
    img -= mean
    img *= denominator
    return img

In [None]:
def get_transforms_train(image):
    crop_side = int(224*random.uniform(0.6, 1))
    temp = tf.image.random_crop(image, size=(crop_side, crop_side, 3)).numpy()
    temp = resize(temp, size=(224, 224)).numpy()
    temp = tf.image.random_flip_left_right(temp).numpy()

    if random.choice([True, False]):
        k = random.randint(1, 4)
        temp = tf.image.rot90(temp, k=k).numpy()
    
    if random.choice([True, False]):
        if random.choice([True, False]):
            max_delta = random.uniform(0.2, 1.2)
            temp = tf.image.random_brightness(temp, max_delta=max_delta).numpy()
            temp = tf.image.random_contrast(temp, lower=0.2, upper=2.0).numpy()
        else:
            gamma = random.uniform(0.8, 1.2)
            temp = tf.image.adjust_gamma(temp, gamma=gamma).numpy()

    if random.choice([True, False]):
        one_of_blur = random.choice([1, 2, 3])

        if one_of_blur == 1:
            temp = blur(temp, blur_limit=7)
        elif one_of_blur == 2:
            temp = gaussian_blur(temp)
        elif one_of_blur == 3:
            temp = motion_blur(temp)

    if random.choice([True, False]):
        temp = iso_noise(temp)
        
    if random.choice([True, False]):
        temp = temp.reshape([1,temp.shape[0], temp.shape[1], 3])
        temp = random_cut_out(temp).numpy()
    
        return temp[0]
    
    return temp

In [None]:
x_ = get_transforms_train(load_img('../input/sorghum-cultivar-identification-512512/train/2017-06-01__10-26-27-479.png', target_size=(224,224)))
plt.imshow(array_to_img(x_))

### Split data

In [None]:
df_train = pd.read_csv('../input/128128-sorghum-cultivar/train_meta.csv')
df_valid = pd.read_csv('../input/128128-sorghum-cultivar/valid_meta.csv')
    
df_valid, df_test = train_test_split(df_valid, test_size=0.1)

print(f"train size: {len(df_train)}")
print(f"valid size: {len(df_valid)}")
print(f"test size: {len(df_test)}")

print(df_train.cultivar.value_counts())
print(df_valid.cultivar.value_counts())
print(df_test.cultivar.value_counts())

## Training


In [None]:
model = EfficientNetB3(include_top=False, input_shape=(224, 224, 3), weights='imagenet')

In [None]:
x_ = AveragePooling2D(pool_size=(7,7))(model.layers[-1].output)
x_ = Flatten()(x_)
x_ = BatchNormalization()(x_)
x_ = Dropout(0.5)(x_)
output_layer = Dense(units=100, activation='softmax')(x_)

model = Model(model.input, output_layer)

In [None]:
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss=categorical_crossentropy,
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
train_generator = ImageDataGenerator(preprocessing_function=get_transforms_train).flow_from_dataframe(dataframe=df_train,
                                                                                                      directory='../input/128128-sorghum-cultivar/train',
                                                                                                      x_col='image',
                                                                                                      y_col='cultivar',
                                                                                                      batch_size=16,
                                                                                                      target_size=(224, 224))

valid_generator = ImageDataGenerator().flow_from_dataframe(dataframe=df_valid,
                                                           directory='../input/128128-sorghum-cultivar/train',
                                                           x_col='image',
                                                           y_col='cultivar',
                                                           batch_size=16,
                                                           target_size=(224, 224))

test_generator = ImageDataGenerator().flow_from_dataframe(dataframe=df_test,
                                                          directory='../input/128128-sorghum-cultivar/train',
                                                          x_col='image',
                                                          y_col='cultivar',
                                                          batch_size=16,
                                                          target_size=(224, 224))

In [None]:
model_checkpoint_callback = ModelCheckpoint(filepath=os.path.join('best-checkpoint.hdf5'),
                                            save_weights_only=False,
                                            monitor='val_accuracy',
                                            mode='max',
                                            verbose=1,
                                            save_best_only=True)

early_stop = EarlyStopping(monitor='val_loss',
                           patience=15,
                           verbose=1,
                           restore_best_weights=True)

one_cycle = OneCycleLr(max_lr=1e-3, steps_per_epoch=1232, epochs=40)

In [None]:
model.optimizer.lr

In [None]:
history = model.fit(train_generator,
                    validation_data=valid_generator,
                    epochs=40,
                    steps_per_epoch=1232,
                    validation_steps=370,
                    callbacks=[early_stop, model_checkpoint_callback, one_cycle])

In [None]:
load_model('model.hdf5').evaluate(test_generator)

In [None]:
model.evaluate(test_generator)

In [None]:
temp_1 = pd.DataFrame(history.history)
temp_1.to_pickle('history.pkl')

In [None]:
model.save('model.hdf5')

In [None]:
train_generator.class_indices

In [None]:
import json

with open('class_indices.json', 'w') as file:
    json.dump(train_generator.class_indices, file)

In [None]:
df_pred = pd.read_csv('../input/sorghum-id-fgvc-9/sample_submission.csv')

In [None]:
preds_generator = ImageDataGenerator().flow_from_dataframe(dataframe=df_pred,
                                                                                            directory='../input/sorghum-id-fgvc-9/test',
                                                                                            x_col='filename',
                                                                                            y_col='cultivar',
                                                                                            batch_size=16,
                                                                                            target_size=(224, 224))

preds = model.predict(preds_generator)

In [None]:
preds

In [None]:
key = []

for i in range(len(preds)):
    key.append(list(train_generator.class_indices.keys())[list(train_generator.class_indices.values()).index(np.argmax(preds[i]))])


In [None]:
df_pred = df_pred.drop('cultivar', axis=1)
df_pred['cultivar'] = key
df_pred

In [None]:
df_pred.to_csv('submission_12.csv', index=False)