<a href="https://colab.research.google.com/github/aminojagh/fast-ai/blob/main/NB5-Road-to-the-top.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initial Setup

using fastkaggle to setup the competition (requires ~/kaggle/kaggle.json)

In [None]:
kaggle_config_path = '/root/.config/kaggle'
!mkdir {kaggle_config_path}
from google.colab import files
files.upload(kaggle_config_path)
!chmod 600 /root/.config/kaggle/kaggle.json

In [None]:
!pip install -Uq fastkaggle fastai

In [None]:
from fastkaggle import setup_comp, iskaggle, push_notebook
from fastai.vision.all import (get_image_files, PILImage, set_seed,
                               ImageDataLoaders, Resize, aug_transforms,
                               vision_learner, error_rate, valley, slide,
                               GradientAccumulation, save_pickle, first)

from fastcore.parallel import parallel
import pandas as pd, numpy as np
import pickle

In [None]:
comp = 'paddy-disease-classification'
# path = setup_comp(comp, install='fastai "timm>=0.6.2.dev0"')
path = setup_comp(comp, install='fastai timm')
print(path)
display(path.ls())

## Looaking at the Data

In [None]:
trn_path = path/'train_images'
files = get_image_files(trn_path)
# img = PILImage.create(files[0])
# print(img.size)
# img.to_thumb(128)

In [None]:
def f(o): return PILImage.create(o).size
sizes = parallel(f, files, n_workers=8)
pd.Series(sizes).value_counts()

In [None]:
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, seed=42,
    item_tfms=Resize(480, method='squish'),
    batch_tfms=aug_transforms(size=128, min_scale=0.75))

# dls.show_batch(max_n=6)

## Our first model

In [None]:
learn = vision_learner(dls, 'resnet26d', metrics=error_rate, path='.').to_fp16()
learn.lr_find(suggest_funcs=(valley, slide))

In [None]:
learn.fine_tune(3, 0.01)

## Submitting to Kaggle-I

In [None]:
ss = pd.read_csv(path/'sample_submission.csv')
tst_files = get_image_files(path/'test_images').sorted()
tst_dl = dls.test_dl(tst_files)

probs,_,idxs = learn.get_preds(dl=tst_dl, with_decoded=True)
# print(idxs)
# print(dls.vocab)
mapping = dict(enumerate(dls.vocab))
results = pd.Series(idxs.numpy(), name="idxs").map(mapping)

ss['label'] = results
ss.to_csv('subm.csv', index=False)
# !head subm.csv

if not iskaggle:
    from kaggle import api
    api.competition_submit_cli('subm.csv', 'initial rn26d 128px', comp)
    # push_notebook('jhoward', 'first-steps-road-to-the-top-part-1',
    #               title='First Steps: Road to the Top, Part 1',
    #               file='first-steps-road-to-the-top-part-1.ipynb',
    #               competition=comp, private=False, gpu=True)

## Going faster

In [None]:
from pathlib import Path
from fastai.vision.all import resize_images, ResizeMethod, PadMode
import numpy as np

In [None]:
trn_path = Path('sml')
resize_images(path/'train_images', dest=trn_path, max_size=256, recurse=True)

In [None]:
def train(trn_path, arch, item, batch, epochs=5):
    dls = ImageDataLoaders.from_folder(trn_path, seed=42, valid_pct=0.2, item_tfms=item, batch_tfms=batch)
    learn = vision_learner(dls, arch, metrics=error_rate).to_fp16()
    learn.fine_tune(epochs, 0.01)
    return learn

In [None]:
# # our initial model
# learn = train(trn_path,
#               'resnet26d',
#               item=Resize(192),
#               batch=aug_transforms(size=128, min_scale=0.75))

## A ConvNeXt model

In [None]:
arch = 'convnext_small_in22k'

# learn = train(trn_path,
#               arch,
#               item=Resize(192, method='squish'), # the default method is 'crop'
#               batch=aug_transforms(size=128, min_scale=0.75))

learn = train(trn_path,
              arch,
              item=Resize((256,192),
                          method=ResizeMethod.Pad, pad_mode=PadMode.Zeros),
              batch=aug_transforms(size=(171,128), min_scale=0.75))

## Test time augmentation

In [None]:
valid = learn.dls.valid
preds,targs = learn.get_preds(dl=valid)
error_rate(preds, targs)

In [None]:
tta_preds,_ = learn.tta(dl=valid)
error_rate(tta_preds, targs)

## Scaling up

In [None]:
trn_path = path/'train_images'

learn = train(trn_path,
              arch,
              epochs=12,
              item=Resize((480, 360), method=ResizeMethod.Pad, pad_mode=PadMode.Zeros),
              batch=aug_transforms(size=(256,192), min_scale=0.75))

tta_preds,targs = learn.tta(dl=learn.dls.valid)
error_rate(tta_preds, targs)

## Submitting to Kaggle-II

In [None]:
def submit_to_kaggle(sample_sub_file_path:Path,
                     test_images_path:Path,
                     iskaggle:bool, tta:bool,
                     sub_title:str
                     ):

  ss = pd.read_csv(sample_sub_file_path)
  tst_files = get_image_files(test_images_path).sorted()
  tst_dl = learn.dls.test_dl(tst_files)

  if tta:
    preds,_ = learn.tta(dl=tst_dl)
    idxs = preds.argmax(dim=1)
  else:
    probs,_,idxs = learn.get_preds(dl=tst_dl, with_decoded=True)

  vocab = np.array(learn.dls.vocab)
  results = pd.Series(vocab[idxs], name="idxs")


  ss['label'] = results
  ss.to_csv('subm.csv', index=False)

  if not iskaggle:
      from kaggle import api
      api.competition_submit_cli('subm.csv', sub_title, comp)

In [None]:
submit_to_kaggle(sample_sub_file_path = path/'sample_submission.csv',
                 test_images_path = path/'test_images',
                 iskaggle = iskaggle, tta = True,
                 sub_title = 'convnext small 256x192 12 epochs tta')

## Memory and gradient accumulation

In [None]:
tst_files = get_image_files(path/'test_images').sorted()
df = pd.read_csv(path/'train.csv')
df.label.value_counts()
trn_path = path/'train_images'/'bacterial_panicle_blight'

In [None]:
def train(
    arch, size, item=Resize(480, method='squish'),
    accum=1, finetune=True, epochs=12
):
    dls = ImageDataLoaders.from_folder(
        trn_path, valid_pct=0.2, item_tfms=item,
        batch_tfms=aug_transforms(size=size, min_scale=0.75),
        bs=64//accum
    )
    cbs = GradientAccumulation(64) if accum else []
    learn = vision_learner(
        dls, arch, metrics=error_rate, cbs=cbs
    ).to_fp16()
    if finetune:
        learn.fine_tune(epochs, 0.01)
        return learn.tta(dl=dls.test_dl(tst_files))
    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs, 0.01)

## Checking memory use

In [None]:
import gc, torch
def report_gpu():
    print(torch.cuda.list_gpu_processes())
    gc.collect()
    torch.cuda.empty_cache()

In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=1, finetune=False)
report_gpu()

train('convnext_small_in22k', 128, epochs=1, accum=2, finetune=False)
report_gpu()

train('convnext_small_in22k', 128, epochs=1, accum=4, finetune=False)
report_gpu()

train('convnext_large_in22k', 224, epochs=1, accum=2, finetune=False)
report_gpu()

train('convnext_large_in22k', (320,240), epochs=1,
      accum=2, finetune=False)
report_gpu()

train('vit_large_patch16_224', 224, epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
# torch.cuda.empty_cache()
# train('swinv2_large_window12_192_22k', 192, epochs=1,
#       accum=4, finetune=False)
# report_gpu()

# --------------------

# train('swin_large_patch4_window7_224', 224, epochs=1, accum=4, finetune=False)
# report_gpu()

# Results in an Error related to timm version

## Running the models

In [None]:
res = 640,480

models = {
    # 'convnext_large_in22k': {
    #     (Resize(res), 224),
    #     # (Resize(res), (320,224)),
    # },
    'vit_large_patch16_224': {
        (Resize(480, method='squish'), 224),
        # (Resize(res), 224),
    },
    # 'swinv2_large_window12_192_22k': {
    #     (Resize(480, method='squish'), 192),
    #     (Resize(res), 192),
    # },
    # 'swin_large_patch4_window7_224': {
    #     (Resize(480, method='squish'), 224),
    #     (Resize(res), 224),
    # }
}


trn_path = path/'train_images'

tta_res = []

for arch,details in models.items():
    for item,size in details:
        print('---',arch)
        print(size)
        print(item.name)
        tta_res.append(train(arch, size, item=item, accum=4)) #, epochs=1))
        gc.collect()
        torch.cuda.empty_cache()

## Ensembling

In [None]:
save_pickle('tta_res.pkl', tta_res)

In [None]:
tta_prs = first(zip(*tta_res))
# tta_prs += tta_prs[2:4]
avg_pr = torch.stack(tta_prs).mean(0)
print(avg_pr.shape)

In [None]:
dls = ImageDataLoaders.from_folder(
    trn_path, valid_pct=0.2,
    item_tfms=Resize(480, method='squish'),
    batch_tfms=aug_transforms(size=224, min_scale=0.75)
)

idxs = avg_pr.argmax(dim=1)
vocab = np.array(dls.vocab)
ss = pd.read_csv(path/'sample_submission.csv')
ss['label'] = vocab[idxs]
ss.to_csv('subm.csv', index=False)

## Submitting to Kaggle-III

In [None]:
if not iskaggle:
    from kaggle import api
    api.competition_submit_cli('subm.csv', 'convnext_vit_ensemble', comp)

## Part4 | Multi-output Predicion

In this part we're going to build a model that doesn't just predict what disease the rice paddy has, but also predicts what kind of rice is shown.

This might sound like a bad idea. After all, doesn't that mean that the model has *more* to do? Mightn't it get rather distracted from its main task, which is to identify paddy disease?

Perhaps... But in previous projects I've often found the opposite to be true, especially when training for quite a few epochs. By giving the model more signal about what is present in a picture, it may be able to use this information to find more interesting features that predict our target of interest. For instance, perhaps some of the features of disease change between varieties.

## Multi-output `DataLoader`

In [None]:
from fastai.vision.all import (
    DataBlock, ImageBlock, CategoryBlock,
    parent_label, RandomSplitter, set_seed, F
)
# from fastcore.parallel import *
set_seed(42)
trn_path = path/'train_images'

First we'll repeat the steps we used last time to access the data and ensure all the latest libraries are installed:

Here's the CSV that Kaggle provides, showing the variety of rice contained in each image -- we'll make `image_id` the index of our data frame so that we can look up images directly to grab their variety:

In [None]:
df = pd.read_csv(path/'train.csv', index_col='image_id')
display(df.head(2))

In [None]:
def get_variety(p): return df.loc[p.name, 'variety']

In [None]:
# how to get labels
image_list = get_image_files(trn_path)
print(f"disease: {parent_label(image_list[0])} | variety:\
 {get_variety(image_list[0])}")

In [None]:
dls = DataBlock(
    blocks=(ImageBlock,CategoryBlock,CategoryBlock),
    n_inp=1,
    get_items=get_image_files,
    get_y = [parent_label,get_variety],
    splitter=RandomSplitter(0.2, seed=42),
    item_tfms=Resize(192, method='squish'),
    batch_tfms=aug_transforms(size=128, min_scale=0.75)
).dataloaders(trn_path)

Here's an explanation of each line:

```python
blocks=(ImageBlock,CategoryBlock,CategoryBlock),
```

The `DataBlock` will create 3 things from each file: an image (the contents of the file), and 2 categorical variables (the disease and the variety).

```python
n_inp=1,
```

There is `1` input (the image) -- and therefore the other two variables (the two categories) are outputs.

```python
get_items=get_image_files,
```

Use `get_image_files` to get a list of inputs.

```python
get_y = [parent_label,get_variety],
```

To create the two outputs for each file, call two functions: `parent_label` (from fastai) and `get_variety` (defined above).

```python
splitter=RandomSplitter(0.2, seed=42),
```

Randomly split the input into 80% train and 20% validation sets.

```python
item_tfms=Resize(192, method='squish'),
batch_tfms=aug_transforms(size=128, min_scale=0.75)
```

These are the same item and batch transforms we've used in previous notebooks.

Let's take a look at part of a batch of this data:

In [None]:
dls.show_batch(max_n=3)

We can see that fastai has created both the image input and two categorical outputs that we requested!

## Replicating the disease model

Now we'll replicate the same disease model we've made before, but have it work with this new data.

The key difference is that our metrics and loss will now receive three things instead of two: the model outputs (i.e. the metric and loss function inputs), and the two targets (disease and variety). Therefore, we need to define slight variations of our metric (`error_rate`) and loss function (`cross_entropy`) to pass on just the `disease` target:

In [None]:
# def disease_err(inp,disease,variety): return error_rate(inp,disease)
# def disease_loss(inp,disease,variety): return F.cross_entropy(inp,disease)

We're now ready to create our learner.

There's just one wrinkle to be aware of. Now that our `DataLoaders` is returning multiple targets, fastai doesn't know how many outputs our model will need. Therefore we have to pass `n_out` when we create our `Learner` -- we need `10` outputs, one for each possible disease:

In [None]:
# arch = 'convnext_small_in22k'
# learn = vision_learner(dls, arch, loss_func=disease_loss,
#                        metrics=disease_err, n_out=10).to_fp16()
# lr = 0.01

# # When we train this model we should get similar results
# # to what we've seen with similar models before:
# learn.fine_tune(5, lr)

## Multi-target model

In order to predict both the probability of each disease, and of each variety, we'll now need the model to output a tensor of length 20, since there are 10 possible diseases, and 10 possible varieties. We can do this by setting `n_out=20`

We can define `disease_loss` just like we did previously, but with one important change: the input tensor is now length 20, not 10, so it doesn't match the number of possible diseases. We can pick whatever part of the input we want to be used to predict disease. Let's use the first 10 values:

In [None]:
# the first second 10 items for the disease loss
def disease_loss(inp,disease,variety):
  return F.cross_entropy(inp[:,:10],disease)
# and the second 10 items for the variety loss
def variety_loss(inp,disease,variety):
  return F.cross_entropy(inp[:,10:],variety)

# overall loss function
def combine_loss(inp,disease,variety):
  return (disease_loss(inp,disease,variety)+
          variety_loss(inp,disease,variety))

def disease_err(inp,disease,variety):
  return error_rate(inp[:,:10],disease)
def variety_err(inp,disease,variety):
  return error_rate(inp[:,10:],variety)

err_metrics = (disease_err,variety_err)
all_metrics = err_metrics+(disease_loss,variety_loss)

We're now ready to create and train our `Learner`:

In [None]:
arch = 'convnext_small_in22k'
learn = vision_learner(dls, arch, loss_func=combine_loss,
                       metrics=all_metrics, n_out=20).to_fp16()
learn.fine_tune(10, base_lr = 0.01)

## Submitting to Kaggle-IV

In [None]:
test_images_path = path/'test_images'
tst_files = get_image_files(test_images_path).sorted()
tst_dl = learn.dls.test_dl(tst_files)

# tta
preds,_ = learn.tta(dl=tst_dl)

In [None]:
idxs = preds[:, :10].argmax(dim=1)
vocab = np.array(learn.dls.vocab)[0]
results = pd.Series(vocab[idxs], name="idxs")

In [None]:
sample_sub_file_path = path/'sample_submission.csv'
ss = pd.read_csv(sample_sub_file_path)
ss['label'] = results
ss.to_csv('subm.csv', index=False)

sub_title = 'Multi-output ConvNext | Disease and Variety'
from kaggle import api
api.competition_submit_cli('subm.csv', sub_title, comp)

## Conclusion

So, is this useful?

Well... if you actually want a model that predicts multiple things, then yes, definitely! But as to whether it's going to help us better predict rice disease, I honestly don't know. I haven't come across any research that tackles this important question: when can a multi-target model improve the accuracy of the individual targets compared to a single target model? (That doesn't mean it doesn't exist of course -- perhaps it does and I haven't found it yet...)

I've certainly found in previous projects that there are cases where improvements to single targets can be made by using a multi-target model. I'd guess that it'll be most useful when you're having problems with overfitting.