In [1]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import seaborn as sns
import cv2
import albumentations as A

from albumentations.core.composition import Compose
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold, train_test_split

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Add, Activation
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten

from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import load_img, img_to_array, array_to_img

### Config

In [2]:
train_meta_data = '../train.csv'
train_data_dir = '../input/paddy-disease-classification/train_images'
epochs = 150
lr = 1e-4
valid_split = 0.2
input_size = 224
batch_size = 32
classes = 10
initializer = tf.keras.initializers.HeUniform()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
loss = tf.keras.losses.categorical_crossentropy

In [17]:
early_stop = tf.keras.callbacks.EarlyStopping(patience=15,
                                              monitor='val_loss',
                                              restore_best_weights=True,
                                              verbose=1)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=5,
                                                 monitor='val_loss',
                                                 factor=0.75,
                                                 verbose=1)

### Pre-processing Pipeline

In [None]:
def resize(image, size):
    return tf.image.resize(image, size)


def blur(img, blur_limit):
    return cv2.blur(img, ksize=[blur_limit, blur_limit])


def gaussian_blur(img, blur_limit=(3, 7), sigma_limit=0):
    return cv2.GaussianBlur(img, ksize=blur_limit, sigmaX=sigma_limit)


def motion_blur(img, blur_limit=7):
    kmb = np.zeros((blur_limit, blur_limit))
    kmb[(blur_limit - 1) // 2, :] = np.ones(blur_limit)
    kmb = kmb / blur_limit
    return cv2.filter2D(img, -1, kernel=kmb)


def random_cut_out(images):
    return tfa.image.random_cutout(images, (32, 32), constant_values=0)


def aug_fn(image):
    data = {"image":image}
    aug_data = get_transform(**data)
    aug_img = aug_data["image"]
    aug_img = tf.cast(aug_img/255.0, tf.float32)
    aug_img = tf.image.resize(aug_img, size=[224, 224])
    return aug_img

get_transform = Compose([A.CoarseDropout(max_holes=16, min_holes=8, max_height=16, max_width=16, min_height=8, min_width=8, p=0.2)])

In [None]:
def get_transforms_train(image):
    # get random crop of random crop window size
    crop_side = int(224*random.uniform(0.33, 1))
    temp = tf.image.random_crop(image, size=(crop_side, crop_side, 3)).numpy()
    temp = resize(temp, size=(224, 224)).numpy()

    # random flip (vertically)
    temp = tf.image.random_flip_left_right(temp).numpy()

    if np.random.choice([True, False], p=[0.45, 0.55]):
        if random.choice([True, False]):
            delta = random.uniform(-0.3, 0.3)
            cf = random.uniform(-1.0, 1.0)
            temp = tf.image.adjust_brightness(temp, delta=delta).numpy()
            temp = tf.image.adjust_contrast(temp, contrast_factor=cf).numpy()

    if np.random.choice([True, False], p=[0.25, 0.75]):
        delta = random.uniform(-0.1, 0.2)
        temp = tf.image.adjust_hue(temp, delta=delta).numpy()

    if np.random.choice([True, False], p=[0.2, 0.8]):
        sf = random.uniform(-0.1, 0.1)
        temp = tf.image.adjust_saturation(temp, saturation_factor=sf).numpy()

    if np.random.choice([True, False], p=[0.4, 0.6]):
        one_of_blur = random.choice([1, 2, 3])

        if one_of_blur == 1:
            temp = blur(temp, blur_limit=7)
        elif one_of_blur == 2:
            temp = gaussian_blur(temp)
        elif one_of_blur == 3:
            temp = motion_blur(temp)

    if np.random.choice([True, False], p=[0.3, 0.7]):
        temp = temp.reshape([1,temp.shape[0], temp.shape[1], 3])
        temp = random_cut_out(temp).numpy()

        return tf.convert_to_tensor(temp[0], dtype=tf.float32)

    temp = aug_fn(temp).numpy()

    return tf.convert_to_tensor(temp, dtype=tf.float32)

### Config data loaders

In [5]:
train_data = []

for i, folder in enumerate(os.listdir('../input/paddy-disease-classification/train_images/')):
    folder_path = os.path.join('../input/paddy-disease-classification/train_images',folder)
    
    for j, file in enumerate(os.listdir(folder_path)):
        file_path = os.path.join(folder_path, file)
        train_data.append([file_path, folder, file])

train_data = pd.DataFrame(train_data, columns=['file_path', 'label', 'file'])

In [6]:
train_data

In [7]:
train, valid = train_test_split(train_data, test_size=0.3, random_state=48)
valid, test = train_test_split(valid, test_size=0.1, random_state=48)

train.shape, valid.shape, test.shape

In [8]:
tr = pd.DataFrame(train['label'].value_counts()/train.shape[0])
va = pd.DataFrame(valid['label'].value_counts()/valid.shape[0])

tr['subset'] = 'train'
va['subset'] = 'valid'

temp = pd.concat([tr,va]).reset_index()

plt.figure(figsize=[24,6], dpi=300)
sns.barplot(x='index',
            y='label',
            hue='subset',
            data=temp)
plt.savefig('class-balance.jpg')
plt.show()

In [18]:
generator_tr = ImageDataGenerator(rescale=1 / 255,
                                  rotation_range=10,
                                  shear_range=0.25,
                                  zoom_range=0.1,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,
                                  horizontal_flip=True,
                                  vertical_flip=True,
#                                   preprocessing_function=get_transforms_train
                                 )

generator = ImageDataGenerator(rescale=1 / 255)

train_datagen = generator_tr.flow_from_dataframe(dataframe=train,
                                              x_col='file_path',
                                              y_col='label',
                                              target_size=(input_size, input_size),
                                              batch_size=batch_size)

valid_datagen = generator.flow_from_dataframe(dataframe=valid,
                                           x_col='file_path',
                                           y_col='label',
                                           target_size=(input_size, input_size),
                                           batch_size=batch_size)

test_data = generator.flow_from_dataframe(dataframe=test,
                                          x_col='file_path',
                                          y_col='label',
                                          target_size=(input_size, input_size),
                                          batch_size=batch_size)

In [11]:
len(train_datagen.next()[0]), len(valid_datagen.next()[0])

### Model

In [19]:
back_bone = tf.keras.applications.EfficientNetB4(weights='imagenet', include_top=False)
back_bone.summary()

In [13]:
tf.keras.utils.plot_model(back_bone, to_file='effnet-b4.png')

In [20]:
input_layer = Input(shape=(input_size,input_size,3))
x = back_bone(input_layer)
x = GlobalAveragePooling2D()(x)
output_layer = Dense(10, activation='softmax')(x)

model = Model(input_layer,output_layer)

model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['accuracy'])

In [21]:
model.summary()

In [None]:
history = model.fit(train_datagen,
                    validation_data=valid_datagen,
                    batch_size=batch_size,
                    epochs=50,
                    callbacks=[early_stop,reduce_lr])

In [None]:
model.evaluate(test_datagen)

### Evaluate

In [None]:
plt.figure(figsize=[12,6], dpi=300)
sns.lineplot(x=list(range(len(history.history['main_out_accuracy']))),
             y=history.history['main_out_accuracy'],
             label='train')
sns.lineplot(x=list(range(len(history.history['val_main_out_accuracy']))),
             y=history.history['val_main_out_accuracy'],
             label='validation')
plt.show()

In [None]:
plt.figure(figsize=[12,6], dpi=300)
sns.lineplot(x=list(range(len(history.history['main_out_loss']))),
             y=history.history['main_out_loss'],
             label='train')
sns.lineplot(x=list(range(len(history.history['val_main_out_loss']))),
             y=history.history['val_main_out_loss'],
             label='validation')
plt.show()

In [None]:
print(f'test score : {model.evaluate(test_data)} -- validation : {model.evaluate(valid_data)}')

### Saving files

In [None]:
temp = pd.DataFrame(history.history)
temp.to_csv('model_inc_history.csv', index=False)

In [None]:
model.save('model.hdf5')

In [None]:
model.save_weights('model_inception_weights.hdf5')

### Inference

In [None]:
train_data.class_indices

In [None]:
test_preds = []

for i, file in enumerate(os.listdir('../input/paddy-disease-classification/test_images')):
    img = load_img(os.path.join('../input/paddy-disease-classification/test_images',file), target_size=(input_size,input_size))
    pred = model.predict(np.expand_dims(img_to_array(img)/255.0, axis=0))
    pred = np.argmax(pred[0][0])
    label = list(train_data.class_indices.keys())[list(train_data.class_indices.values()).index(pred)]
    test_preds.append([file, label])
    print(f"{i+1}/{len(os.listdir('../input/paddy-disease-classification/test_images'))}", end='\r')

In [None]:
temp = pd.DataFrame(test_preds, columns=['image_id', 'label'])
temp.to_csv('model_submission.csv', index=False)
temp