In [None]:
# default_exp evaluate

# Evaluate

> This module contains functions to infer on the test set and generate the final submission file for Kaggle.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *
from plant_pathology.config import TEST_DATA_PATH
from plant_pathology.utils import load_data
from plant_pathology.dataset import get_dls
from fastai.vision.all import *
from torch.nn import Linear
import torch

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#export
from typing import Union

import pandas as pd
from fastai.vision.all import *

## Infer on Test Set

In [None]:
#export
def infer_on_test_set(
    learn: Learner,
    path: Path,
    tta: bool = False,
    bs: int = 64,
) -> Tensor:
    """Infers on test CSV at `path` using `learn`, optionally performing TTA."""
    df_test = pd.read_csv(path)
    test_dl = learn.dls.test_dl(df_test, bs=bs)
    preds, _ = (learn.tta if tta else learn.get_preds)(dl=test_dl)
    return preds

In [None]:
#hide
class PredictSingleStep(Callback):
    """Callback to limit prediction to only first batch."""
    def __init__(self): 
        self.step_count = 0
    def after_batch(self):
        if self.step_count >= 1:
            raise CancelValidException
        self.step_count += 1

In [None]:
#hide
# Build learner to test inference
path, df = load_data(TEST_DATA_PATH, with_folds=True)
dls = get_dls(path, df, presize=32, resize=32, bs=3)
simple_model = sequential(AdaptiveAvgPool(), Flatten(), Linear(3, dls.c))
learn = Learner(dls, simple_model, loss_func=CrossEntropyLossFlat())#, cbs=[PredictSingleStep()])
test_csv_path = TEST_DATA_PATH/"train.csv"

In [None]:
preds = infer_on_test_set(learn, path=test_csv_path)

In [None]:
#hide
# Test probabilities all sum to 1.
test_close(preds.sum(1), 1.)  # cnn_learner applies softmax after tta

## Format Submission

In [None]:
#export
def format_submission(preds: Tensor, save_path: Union[Path, str]) -> Path:
    """Formats raw `preds` into submission CSV, saving at `save_path`."""
    # Build submission CSV
    image_filenames = [f"Test_{i}" for i in range(len(preds))]
    column_names = ["healthy", "multiple_diseaes", "rust", "scab"]
    submission = pd.DataFrame(preds, index=image_filenames, columns=column_names)

    # Make parent dirs
    save_path = Path(save_path)
    Path(save_path.parent).mkdir(parents=True, exist_ok=True)

    # Save submission
    submission.to_csv(save_path)
    return save_path

In [None]:
fake_preds = torch.zeros(1821, 4)
save_path = format_submission(fake_preds, "TESTING.csv"); save_path

Path('TESTING.csv')

In [None]:
#hide
assert save_path.exists()
save_path.unlink()

## All-in-One Learner -> Submission Function

In [None]:
#export
def evaluate(
    learn: Learner, path: Path, name: str = "submission.csv", tta: bool = False
) -> Path:
    """Evaluates `learn` on test CSV at `path` and saves as `name`, optionally applying TTA."""
    preds = infer_on_test_set(learn, path=path, tta=tta)
    return format_submission(preds, name)

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_utils.ipynb.
Converted 01_dataset.ipynb.
Converted 02_evaluate.ipynb.
Converted 03_train.ipynb.
Converted 04_generate_pseudo_labels.ipynb.
Converted 05_self_knowledge_distillation.ipynb.
Converted 06_create_folds.ipynb.
Converted 07_pretrained_models.ipynb.
Converted config.ipynb.
Converted index.ipynb.
