In [None]:

#hide
! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [None]:
#hide
from fastai.vision.all import *
from fastbook import *

from fastai.vision.widgets import *
matplotlib.rc('image', cmap='Greys')

In [None]:
path = Path('dbs/images/categorized/')
fns = get_image_files(path)

In [None]:
failed = verify_images(fns)
failed

In [None]:
mush = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(224, ResizeMethod.Pad, pad_mode='zeros'))
dls = mush.dataloaders(path)

In [None]:
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
learn = vision_learner(dls, resnet101, metrics=error_rate)
learn.fine_tune(4)

In [None]:
learn.export('models/clean-res101.pkl')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [None]:
interp.plot_top_losses(26, nrows=4)

# Batch Inference

In [None]:
image_file = "dbs/images/training-images.csv"
source_images = pd.read_csv(image_file)
images = 'dbs/images/224/' + source_images.gbifid.astype(str) + '-' + source_images.imgid.astype(str) + '.png'
images

In [None]:
learn_inf = load_learner('models/clean-res101.pkl')
learn_inf.model = learn_inf.model.to(device="cuda:0")

In [None]:
test_dl = learn.dls.test_dl(images, bs=1024)


In [None]:
preds, _, decoded = learn.get_preds(dl=test_dl, with_decoded=True, reorder=False)

In [None]:
categorized_file = "dbs/images/categorized.csv"

with open(categorized_file, "w") as f:
    for i, pred in enumerate(preds):
        confidence = pred[decoded[i]]
        percent = float(confidence)
        label = learn_inf.dls.vocab[decoded[i]]
        f.write(images[i] + ',' + label + ',' + str(percent) + "\n")


In [None]:
import ntpath

def splitFileName(inputfile):
    file = inputfile.strip()
    _, tail = ntpath.split(file)
    id = tail.replace(".png", "")
    parts = id.split("-")
    return id, parts, file

def predictBatch(tst_files):
    test_dl = learn_inf.dls.test_dl(tst_files)
    preds, _, decoded = learn_inf.get_preds(dl=test_dl, with_decoded=True, reorder=False)

    for i, pred in enumerate(preds):
        id, parts, file = splitFileName(tst_files[i])
        confidence = pred[decoded[i]]
        percent = float(confidence)
        label = learn_inf.dls.vocab[decoded[i]]
        print(parts[0] + "," + parts[1] + ',' + label + ',' + str(percent))
    
            
predictBatch(items)

In [None]:
learn_inf.dls.vocab