In [1]:
#hide
! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()
from fastai.vision.all import *
from fastbook import *

from fastai.vision.widgets import *

import pandas as pd
import numpy as np
from torch.nn.functional import nll_loss,log_softmax

from sklearn import preprocessing
from functools import partial
import copy

In [2]:
image_file = "dbs/training/training-data-v0-4.csv"
source_images = pd.read_csv(image_file)
source_images = source_images.loc[source_images.imgid.notna()]
source_images['img'] = 'dbs/images/500/' + source_images.gbifid.astype(str) + '-' + source_images.imgid.astype(int).astype(str) + '.png'
species_images = source_images[['img', 'species']]
species_images

Unnamed: 0,img,species
8,dbs/images/500/2983765339-1.png,Leptoporus mollis
60,dbs/images/500/1428858500-1.png,Lactarius necator
61,dbs/images/500/1428858500-2.png,Lactarius necator
62,dbs/images/500/1428858500-3.png,Lactarius necator
81,dbs/images/500/1433233463-1.png,Gomphidius glutinosus
...,...,...
12025628,dbs/images/500/3424155912-1.png,Paralepista flaccida
12025629,dbs/images/500/3424155884-1.png,Cladonia uncialis
12025630,dbs/images/500/3424155890-1.png,Pleurotus pulmonarius
12025632,dbs/images/500/3424155893-1.png,Lichenomphalia umbellifera


In [3]:
family_map = {species:family for (species,_,family) in source_images.groupby(by=["species","genus","_family"]).indices.keys()}
family_list = list(family_map.values())
fle = preprocessing.LabelEncoder()
family_targets = torch.tensor(fle.fit_transform(family_list), device='cuda:0')
family_dims = family_targets.max().int().item() + 1

genus_map = {species:genus for (species,genus,_) in source_images.groupby(by=["species","genus","_family"]).indices.keys()}
genus_list = list(genus_map.values())
gle = preprocessing.LabelEncoder()
genus_targets = torch.tensor(gle.fit_transform(genus_list), device='cuda:0')
genus_dims = genus_targets.max().int().item() + 1

In [4]:
# -- %%fsql
# -- images = SELECT 'dbs/images/224/' + gbifid + '-' + imgid + '.png' img, species 
# -- FROM (LOAD '{{image_file}}' (header=true))
# -- YIELD DATAFRAME AS images
# -- PRINT

In [5]:
def accuracy_species(inp, targ, axis=-1):
    pred,targ = flatten_check(inp.argmax(dim=axis), targ)
    return (pred == targ).float().mean()

def top_5(inp, targ, axis=-1):
    return top_n(5, inp, targ, axis)

def top_10(inp, targ, axis=-1):
    return top_n(10, inp, targ, axis)

def top_n(n, inp, targ, axis=-1):
    _, idx = torch.topk(inp, n)
    return (idx == targ.unsqueeze(axis)).any(axis).float().mean()

def accuracy_tax(tax_targets, inp, targ, axis=-1):
    temp = [torch.argmax(x) for x in inp]
    new_inp = tensor([tax_targets[x] for x in temp])
    new_targ = tensor([tax_targets[x] for x in targ])
    return (new_inp == new_targ).float().mean()    

def accuracy_family(inp, targ, axis=-1):
    return accuracy_tax(family_targets, inp, targ, axis)

def accuracy_genus(inp, targ, axis=-1):
    return accuracy_tax(genus_targets, inp, targ, axis)

def cross_entropy_species(input, target, weight=None, size_average=None, ignore_index=-100,
                          reduce=None, reduction='mean'):
    input_p = torch.softmax(input, dim=-1)
    return nll_loss(torch.log(input_p), target, None, None, ignore_index, None, reduction)

def cross_entropy_tax(tax_targets, target_dims, input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean'):

    # softmax to convert scores to probabilities
    input_p = torch.softmax(input,dim=1)

    # Sum the probabilities for each taxonomy classification 
    # Could not compile: new_input = scatter_add(input_p, tax_targets)
    tax_index = tax_targets.repeat(len(input_p), 1)
    new_input = torch.zeros(len(input_p), target_dims, dtype=input_p.dtype, device='cuda:0')
    new_input.scatter_add_(1, tax_index, input_p)
    # Create the new target
    new_target = TensorCategory(tax_targets[target].long())
    return nll_loss(torch.log(new_input), new_target, None, None, ignore_index, None, reduction)

def cross_entropy_family(input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean'):
    return cross_entropy_tax(family_targets, family_dims, input, target, weight, size_average, ignore_index, reduce, reduction)

def cross_entropy_genus(input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean'):
    return cross_entropy_tax(genus_targets, genus_dims, input, target, weight, size_average, ignore_index, reduce, reduction)


def joint_loss(input, target, w=1, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean'):
    ce_species = cross_entropy_species(input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean')

    ce_genus = cross_entropy_genus(input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean')

    # Linear combination of the cross-entropy scores at the 2 levels in hierarchy.
    return w*ce_species+(1-w)*ce_genus

In [6]:
import re
p = re.compile('dbs/images/500/([0-9]+)-.*')

def get_x(a):
    return a[0]
def get_y(a):
    return a[1]

mush = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    splitter=FuncSplitter(lambda o: (int(p.match(o[0]).group(1)) % 10) < 2),
    get_x=get_x,
    get_y=get_y, 
    item_tfms=Resize(460, pad_mode=PadMode.Zeros),
    batch_tfms=[*aug_transforms(size=224, pad_mode=PadMode.Zeros, min_scale=.75)])

In [7]:
batch_size = 256
opt_size = 4096

dls = mush.dataloaders(species_images.to_numpy(), bs=batch_size)

In [17]:
# weight = 1.0
# species_loss = partial(joint_loss,w=weight)
# final_loss = partial(joint_loss,w=weight)
metrics = [error_rate, accuracy_family, accuracy_genus, accuracy_species, top_5, top_10]
name = 'resnet101-f15-fp16-bs4096-augs'
learn = vision_learner(dls,
                       resnet101,
                       loss_func=cross_entropy_species, 
                       metrics=metrics,
                       path=Path.cwd()/'models/v0.4',
                       cbs=[
                           SaveModelCallback(fname=name, every_epoch=True, with_opt=True), 
                           GradientAccumulation(n_acc=opt_size)
                       ]
                      ).to_fp16()



In [18]:
# for i in [4,5]:
#     learn = learn.load('resnet152-f10-fp32-augs_' + str(i))
#     print(learn.validate())

In [23]:
learn.fine_tune(15, base_lr=.004)

epoch,train_loss,valid_loss,error_rate,accuracy_family,accuracy_genus,accuracy_species,top_5,top_10,time


KeyboardInterrupt: 

In [15]:
learn.load(name + '_13').export(name + '.pkl')

  elif with_opt: warn("Saved filed doesn't contain an optimizer state.")


# Loading & Validation

In [None]:
learn_val = load_learner('models/v0.2-resnet18-h1.0-f4-fp16.pkl')
learn_val.model = learn_val.model.to(device="cuda:0")
learn_val.metrics = metrics
learn_val.dls = dls
learn_val.validate()

# Family/Genus/Species Loss

In [None]:
learn = vision_learner(dls, resnet18, loss_func=cross_entropy_family, metrics=[error_rate, accuracy_family, accuracy_genus, accuracy_species]).to_fp16()
learn.lr_find()

In [None]:
learn.fine_tune(4, base_lr=0.0030199517495930195, )
learn.export("models/family/v0.2-resnet16-f4-fp16.pkl")

In [None]:
learn.loss_func = cross_entropy_genus
learn.lr_find()

In [None]:
learn.fine_tune(4, base_lr=0.0002754228771664202, )
learn.export("models/genus/v0.2-resnet16-f4-fp16.pkl")

In [None]:
learn.loss_func = cross_entropy_species
learn.lr_find()

In [None]:
learn.fine_tune(4, base_lr=0.002290867705596611)
learn.export("models/species/v0.2-resnet16-f4-fp16.pkl")

# Utils

In [None]:
learn = None
dls = None
genus_targets = None
target_dims = None
torch.cuda.empty_cache()


In [None]:
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved

t,r,a,f

In [None]:
src = torch.arange(1, 11).reshape((2, 5))
index = torch.tensor([[0, 1, 2, 0, 2], [0, 1, 2, 0, 2]])
zeros = torch.zeros(2, 3, dtype=src.dtype)
zeros.scatter_(1, index, src, reduce='add')
print(src.shape, index.shape, zeros.shape)
src, index, zeros

In [None]:
src = torch.arange(1, 21).reshape((4, 5))
targs = torch.tensor([3,2,4,5]).unsqueeze(-1)
_, idx = torch.topk(src, 2)
idx, targs
src, idx, targs, (idx == targs).any(-1)

In [None]:
genus_targets.repeat(batch_size).shape

## Old heirarchy attempt

In [None]:
genus_dls = genus_loader(batch_size)
learn.dls = genus_dls
learn.lr_find()

In [None]:
learn.fine_tune(4, base_lr=2.511886486900039e-05)
learn.export("models/genus/v0.2-resnet16-f4-fp16.pkl")

In [None]:
species_dls = species_loader(batch_size, 1)
learn.dls = genus_dls
learn.lr_find()

In [None]:
learn.fine_tune(4, base_lr=2.511886486900039e-05)
learn.export("models/species/v0.2-resnet16-f4-fp16-fgt.pkl")

In [None]:
dls, loss = species_loader(batch_size, .8)

learn = vision_learner(dls, resnet50, metrics=error_rate, loss_func=loss).to_fp16()

# learn.lr_find()
learn.fine_tune(10)
learn.export("models/v0.2-resnet50-h0.8-f4-fp16.pkl")

In [None]:
# learn = vision_learner(dls, resnet18, metrics=error_rate).to_fp16()
# learn.fine_tune(4)

In [None]:
# learn = vision_learner(dls, resnet18, metrics=error_rate)
# learn.fine_tune(4)

In [None]:
# learn = vision_learner(dls, resnet50, metrics=error_rate)
# learn.fine_tune(4)

In [None]:
# learn = vision_learner(dls, resnet50, metrics=error_rate).to_fp16()
# learn.fine_tune(4)
# learn.save("v0.1-resnet50fp16.mdl")

In [None]:
# learn = vision_learner(dls, resnet101, metrics=error_rate).to_fp16()
# learn.fine_tune(4)
# learn.save("v0.1-resnet101fp16.mdl")

In [None]:
# learn = vision_learner(dls, resnet101, metrics=error_rate)
# learn.fine_tune(4)
# learn.save("v0.1-resnet101fp32.mdl")