In [None]:
from fastai.vision.all import *

In [None]:
set_seed(2021)

In [None]:
path = Path("../input/plant-pathology-2021-fgvc8")
path.ls()

In [None]:
train_df = pd.read_csv(path/"train.csv")
train_df.head()

In [None]:
train_df.shape

In [None]:
train_df["labels"].value_counts()

I use this [resized dataset](https://www.kaggle.com/ankursingh12/resized-plant2021) to save my time.  
Without this, it takes too much time to resizing images...😅

In [None]:
item_tfms = [RandomResizedCrop(128, min_scale=0.75, ratio=(1., 1.))]
batch_tfms = [*aug_transforms(size=128, max_warp=0), Normalize.from_stats(*imagenet_stats)]

dls = ImageDataLoaders.from_df(
    df = train_df,
    folder = "../input/resized-plant2021/img_sz_512",
    item_tfms = item_tfms,
    batch_tfms = batch_tfms,
    splitter = RandomSplitter(valid_pct=0.1),
    label_delim = " ",
    bs=256
)

In [None]:
# if you are a big fan of DataBlock API, you can use this code below

# db = DataBlock(
#     blocks=(ImageBlock, MultiCategoryBlock),
#     get_x=ColReader("image", pref="../input/resized-plant2021/img_sz_640/"),
#     get_y=ColReader("labels", label_delim=" "),
#     splitter=RandomSplitter(valid_pct=0.1),
#     item_tfms=item_tfms,
#     batch_tfms=batch_tfms
# )
# dls = db.dataloaders(train_df)

In [None]:
dls.show_batch()

In [None]:
!mkdir -p /root/.cache/torch/hub/checkpoints
# !cp ../input/resnet18/resnet18.pth /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth
!cp ../input/resnet50/resnet50.pth /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth

In [None]:
learn = cnn_learner(
    dls,
    resnet50,
    metrics=[accuracy_multi, F1ScoreMulti()]
).to_fp16()

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(
    7,
    1e-1,
    cbs=[
        SaveModelCallback(),
        EarlyStoppingCallback(patience=3),
    ],
    freeze_epochs=3
)

In [None]:
learn.recorder.plot_loss()

In [None]:
learn.show_results()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(9)

In [None]:
interp.plot_confusion_matrix()

In [None]:
submission_df = pd.read_csv(path/"sample_submission.csv")
submission_df.head()

In [None]:
test_image_path_series = submission_df["image"].apply(lambda x: f"../input/plant-pathology-2021-fgvc8/test_images/{x}")
test_image_path_series.head()

In [None]:
test_dl = learn.dls.test_dl(test_image_path_series)
preds, _ = learn.tta(dl=test_dl)

In [None]:
preds

In [None]:
vocab = learn.dls.vocab
vocab

In [None]:
threshold = 0.5

def pred_to_labels(pred):
    labels = []
    for i, probability in enumerate(pred):
        if probability > threshold:
            labels.append(vocab[i])
            
    return " ".join(labels)

In [None]:
labels_list = [pred_to_labels(pred) for pred in preds]
labels_list

In [None]:
submission_df["labels"] = labels_list
submission_df.head()

In [None]:
submission_df.to_csv("submission.csv", index=False)