# Import libraries

In [None]:
import os
import json
import pandas as pd
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
from skimage.io import imread
import matplotlib.pyplot as plt
from skimage.transform import resize
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

# Define functions

# Execute

In [None]:
batch_size = 16
img_size = (384, 384, 3)

## Load data

In [None]:
root = "../input/cassava-leaf-disease-classification"

root_train = os.path.join(root, "train_images")

df = pd.read_csv(os.path.join(root, "train.csv"))
df["label"] = df["label"].astype(str)

with open(os.path.join(root, "label_num_to_disease_map.json")) as f:
    label_num_to_disease_map = json.load(f)
    
df["label_name"] = df["label"].map(label_num_to_disease_map)
print(df["label_name"].value_counts(normalize=True).sort_index())

In [None]:
df_train, df_val = train_test_split(df, test_size=0.1, random_state=112)
df_train.head()

## Calculate class_weight

In [None]:
class_weight_dict = dict(enumerate(len(df_train)/(5*df_train["label"].value_counts().sort_index().values)))
class_weight_dict

## Peek inside of data

In [None]:
for label, df_train_grp in df_train.groupby(["label"]):
    plt.figure(figsize=(20, 9))
    for i, filename in enumerate(df_train_grp.sample(n=4)["image_id"], start=1):
        plt.subplot(1, 4, i)
        plt.imshow(imread(os.path.join(root_train, filename)))
        plt.title(label_num_to_disease_map[label])

## Define datagenerator

In [None]:
datagen_train = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=(0.90, 1.10),
    shear_range=0.1,
    zoom_range=0.1,
    fill_mode="nearest",
    horizontal_flip=True,
    vertical_flip=True,
)

datagen_val = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

### Define flow_from_dataframe (train)

In [None]:
datagen_flow_train = datagen_train.flow_from_dataframe(
    dataframe=df_train,
    directory=root_train,
    x_col="image_id",
    y_col="label",
    target_size=img_size[:2],
    class_mode="sparse",
    batch_size=batch_size,
    shuffle=True,
    seed=112,
)

### flow_from_dataframe (validation)

In [None]:
datagen_flow_val = datagen_val.flow_from_dataframe(
    dataframe=df_val,
    directory=root_train,
    x_col="image_id",
    y_col="label",
    target_size=img_size[:2],
    class_mode="sparse",
    batch_size=batch_size,
    shuffle=True,
    seed=112,
)

## Visualize augmented data

In [None]:
generator = datagen_train.flow_from_dataframe(df_train.sample(n=1),
                         directory = root_train,
                         x_col = "image_id",
                         y_col = "label",
                         target_size = img_size[:2],
                         batch_size = batch_size,
                         class_mode = "sparse")

aug_images = [generator[0][0][0] for i in range(10)]
fig, axes = plt.subplots(2, 5, figsize = (20, 10))
axes = axes.flatten()
for img, ax in zip(aug_images, axes):
    ax.imshow(img)
    ax.axis('off')
plt.tight_layout()
plt.show()

## Create model (Transfer learning & fine-tuning)

In [None]:
# https://keras.io/guides/transfer_learning/
base_model = ResNet152V2(
    include_top=False,
    weights=None,
    input_shape=img_size,
)
base_model.load_weights('../input/keras-pretrain-model-weights/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5')
base_model.trainable = False

inputs = keras.Input(shape=img_size)
# We make sure that the base_model is running in inference mode here,
# by passing `training=False`. This is important for fine-tuning, as you will
# learn in a few paragraphs.
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)

outputs = keras.layers.Dense(5, activation='softmax')(x)
model = keras.Model(inputs, outputs)

model.summary()

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(lr=1e-2),
    loss='sparse_categorical_crossentropy',
    metrics=['acc']
)

In [None]:
checkpoint = ModelCheckpoint("weight.h5", monitor='val_loss', verbose=2, save_best_only=True, save_weights_only=True, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, min_lr=1e-8, verbose=2)
earlystop = EarlyStopping(monitor='val_loss', mode='min', patience=4, verbose=2)
callbacks_list = [checkpoint, reduce_lr, earlystop]

In [None]:
model.fit(
    datagen_flow_train,
    validation_data=datagen_flow_val,
    steps_per_epoch=len(df_train)//batch_size,
    epochs=20,
    callbacks = callbacks_list,
#     class_weight = class_weight_dict
)

## Fine-tuning

In [None]:
# Load best weights
model.load_weights('weight.h5')

# Unfreeze the base model
base_model.trainable = True

# It's important to recompile your model after you make any changes
# to the `trainable` attribute of any inner layer, so that your changes
# are take into account
model.compile(optimizer=keras.optimizers.Adam(1e-5),  # Very low learning rate
              loss='sparse_categorical_crossentropy',
              metrics=['acc'])

# Train end-to-end. Be careful to stop before you overfit!
model.fit(    
    datagen_flow_train,
    validation_data=datagen_flow_val,
    steps_per_epoch=len(df_train)//batch_size,
    epochs=5,
    callbacks = callbacks_list
)

## Predict test images

In [None]:
# Load best weights
model.load_weights('weight.h5')

list_results = []
for filename in os.listdir(os.path.join(root, "test_images")):
    img = image.load_img(os.path.join(root, "test_images", filename), target_size=img_size[:2])
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    pred = model.predict(x)
    print(filename, np.argmax(pred))
    list_results.append((filename, np.argmax(pred)))

## Create submission.csv

In [None]:
df_submission = pd.DataFrame(list_results, columns=["image_id", "label"])
df_submission.to_csv("submission.csv", index=False)
os.remove("weight.h5")