In [None]:
# default_exp evaluate

In [None]:
%load_ext autoreload
%autoreload 2

# Evaluate

> This module contains functions to infer on the test set and generate the final submission file for Kaggle.

In [None]:
#hide
from nbdev.showdoc import *
from plant_pathology.utils import *
from plant_pathology.dataset import *
from wwf.vision.timm import *
import timm

In [None]:
#export
from plant_pathology.config import DATA_PATH
from fastai.vision.all import *
from typing import *

## Infer on Test Set

In [None]:
#export
def infer_on_test_set(
    learn: Learner, path: Path = DATA_PATH/"test.csv", tta: bool=False, **kwargs
) -> Tensor:
    """Infers on test CSV at `path` using `learn`, optionally performing TTA."""
    df_test = pd.read_csv(path)
    test_dl = learn.dls.test_dl(df_test)
    preds, _ = (learn.tta if tta else learn.get_preds)(dl=test_dl, **kwargs)
    return preds

In [None]:
#hide
path, df = load_data(DATA_PATH, with_folds=True)

In [None]:
df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab,fold
0,Train_1511,0,0,1,0,0.0
1,Train_1799,1,0,0,0,0.0
2,Train_135,1,0,0,0,0.0
3,Train_408,0,0,1,0,0.0
4,Train_1693,1,0,0,0,0.0


In [None]:
#hide
class PredictSingleStep(Callback):
    """Callback to limit prediction to only first batch."""
    def __init__(self): 
        self.step_count = 0
    def after_batch(self):
        if self.step_count >= 1:
            raise CancelValidException
        self.step_count += 1

In [None]:
dls = get_dls(path, df, presize=32, resize=32, bs=512)
learn = cnn_learner(dls, resnet18, cbs=[PredictSingleStep()])

In [None]:
#slow
# Test probabilities all sum to 1.
preds = infer_on_test_set(learn, reorder=False)
test_close(preds.sum(1), 1.)  # cnn_learner applies softmax after tta

KeyboardInterrupt: 

## Format Submission

In [None]:
#export
def format_submission(preds: Tensor, save_path: Union[Path, str]) -> Path:
    # Build submission CSV
    image_filenames = [f"Test_{i}" for i in range(len(preds))]
    column_names = ["healthy", "multiple_diseaes", "rust", "scab"]
    submission = pd.DataFrame(preds, index=image_filenames, columns=column_names)
    
    # Make parent dirs
    save_path = Path(save_path)
    Path(save_path.parent).mkdir(parents=True, exist_ok=True)
    
    # Save submission
    submission.to_csv(save_path)
    return save_path

In [None]:
fake_preds = torch.zeros(1821, 4)
save_path = format_submission(fake_preds, "TESTING.csv"); save_path

Path('TESTING.csv')

In [None]:
assert save_path.exists()

In [None]:
save_path.unlink()

## All-in-One Learner -> Submission Function

In [None]:
#export
def evaluate(learn: Learner, save_path: Union[Path, str]=Path("./submission.csv"), tta: bool=False) -> Path:
    """Takes trained learner, evaluates on test set, formats and saves submission.csv."""
    preds = infer_on_test_set(learn, tta=tta)
    return format_submission(preds, save_path)

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_utils.ipynb.
Converted 01_dataset.ipynb.
Converted 02_evaluate.ipynb.
Converted 03_train.ipynb.
Converted 04_generate_pseudo_labels.ipynb.
Converted 05_self_knowledge_distillation.ipynb.
Converted 06_create_folds.ipynb.
Converted config.ipynb.
Converted index.ipynb.
