In this notebook we are going to evaluate the 4 models trained in notebooks 3-5 on the test set.

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.vision.all import *
from fastai.data.all import *

In [3]:
cwd = Path.cwd()
models = cwd / Path('models')
data = cwd / Path('data')
test = data / Path('test_data')
mushroom_images = Path(data / 'mushroom-images')

In [5]:
# Load in the models for evaluation on the test set
r34 = models / Path('species-resnet34_EXPORT.pkl')
r34 = load_learner(r34)
xr50 = models / Path('species-xresnet50_EXPORT.pkl')
xr50 = load_learner(xr50)
xr18 = models / Path('species-xresnet18_EXPORT.pkl')
xr18 = load_learner(xr18)

In [6]:
%%capture
resnet34_results = {'cat':[], 'actual': [], 'result':[]}
xresnet50_results = {'cat':[], 'actual': [], 'result':[]}
xresnet18_results = {'cat':[], 'actual': [], 'result':[]}
for i, im in enumerate(test.rglob("*.jpg")):
    cat, tensor, probs = r34.predict(im)
    resnet34_results['cat'].append(cat)
    resnet34_results['actual'].append(im.parent.stem)
    if im.parent.stem != cat:
        resnet34_results['result'].append(0)
    else:
        resnet34_results['result'].append(1)
        
    cat, tensor, probs = xr50.predict(im)
    xresnet50_results['cat'].append(cat)
    xresnet50_results['actual'].append(im.parent.stem)
    if im.parent.stem != cat:
        xresnet50_results['result'].append(0)
    else:
        xresnet50_results['result'].append(1)
        
    cat, tensor, probs = xr18.predict(im)
    xresnet18_results['cat'].append(cat)
    xresnet18_results['actual'].append(im.parent.stem)
    if im.parent.stem != cat:
        xresnet18_results['result'].append(0)
    else:
        xresnet18_results['result'].append(1)
    if i % 100 == 0:
        print(i)

In [7]:
import pandas as pd
r34 = pd.DataFrame.from_dict(resnet34_results)
r34.to_csv('data/resnet34_results.csv', index=False)
xr50 = pd.DataFrame.from_dict(xresnet50_results)
xr50.to_csv('data/xresnet50_results.csv', index=False)
xr18 = pd.DataFrame.from_dict(xresnet18_results)
xr18.to_csv('data/xresnet18_results.csv', index=False)

In [16]:
# can't load the genus model as a pickle file so need to redefine a databunch for it
valid_pct = 0.1111
genus_level = data / Path('mushrooms_by_genus')

In [17]:
def y_func(x):
    parent = Path(x.parent.parent)
    sub_dirs = [x for x in parent.iterdir() if x.is_dir()]
    if len(sub_dirs) > 1:
        return x.parent.parent.stem
    else:
        return x.parent.stem

In [18]:
mushroom_db = DataBlock(blocks = (ImageBlock, CategoryBlock), # inputs are images, outputs are categoriesb
                   get_items = get_image_files,   # get images from folder names
                   get_y = y_func,          # get labels by processing filename
                   splitter = RandomSplitter(valid_pct=valid_pct, seed=0),
                   item_tfms = Resize(224),       # resize all images to 224 x 224
                   batch_tfms = aug_transforms()) # augment images to improve generalisation
mushroom_dl = mushroom_db.dataloaders(genus_level)

In [21]:
%%capture
# Evaluating the genus model
g_mod = ('genus-resnet34')
g = cnn_learner(mushroom_dl, resnet34)
g.load(g_mod)
genus_results = {'cat': [], 'actual': [], 'result': []}
# create a dictionary to group all predictions by genus so that each species model is only loaded once
by_genus = dict()

# make predictions at genus level
for im in test.rglob('*.jpg'):
    cat, tensor, probs = g.predict(im)
    if len(cat.split('-')) > 1:
        genus_results['cat'].append(cat)
        genus_results['actual'].append(im.parent.stem)
        if im.parent.stem != cat:
            genus_results['result'].append(0)
        else:
            genus_results['result'].append(1)
    else:
        if cat not in by_genus:
            by_genus[cat] = [im]
        else:
            by_genus[cat].append(im)

FileNotFoundError: [Errno 2] No such file or directory: '/notebooks/storage/mushroom_obs/models/genus/Echinodontium-tinctorium_EXPORT.pkl'

In [39]:
genus_results2 = genus_results

In [40]:
# make predictions at species level
for g in by_genus:
    species_model = models / Path('genus') / Path(str(g) + '_EXPORT.pkl')
    mod = load_learner(species_model)
    for fname in by_genus[g]:
        cat, tensor, probs = mod.predict(fname)
        genus_results2['cat'].append(cat)
        genus_results2['actual'].append(fname.parent.stem)
        if fname.parent.stem != cat:
            genus_results2['result'].append(0)
        else:
            genus_results2['result'].append(1)

In [41]:
g = pd.DataFrame.from_dict(genus_results)
g.to_csv('data/genus_model_results.csv', index=False)