## Autofocus multi-label prediction

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai.vision import *

## Prepare the CSV

In [None]:
import pandas as pd

In [None]:
DATA_DIR = Path() / '..' / '..' / 'data' / 'lpz_2016_2017' / 'processed'
CSV_PATH_IN = DATA_DIR / 'labels.csv'

In [None]:
autofocus_df = pd.read_csv(CSV_PATH_IN)

In [None]:
autofocus_df.head()

In [None]:
autofocus_df.loc[:, "location"] = autofocus_df.loc[:, "filename"].apply(
    lambda fn: fn.split("-")[2]
)

In [None]:
autofocus_df = autofocus_df.loc[autofocus_df.loc[:, 'label'].notna(), ['filename', 'label', 'location']]

In [None]:
autofocus_df.head()

In [None]:
labelmap = {item:item for item in autofocus_df.loc[:, 'label'].unique()}

In [None]:
labelmap['fox squirrel'] = 'squirrel'
labelmap['e. cottontail'] = 'rabbit'
labelmap["squirrel (can'\nsquirrel (can't ID)"] = 'squirrel'
labelmap["gray fox"] = 'fox'
labelmap["w. t. deer"] = 'deer'
labelmap['melanistic grey squirrel'] = 'squirrel'
labelmap['flying squirrel'] = 'squirrel'
labelmap['gray squirrel'] = 'squirrel'
labelmap['v. opossum'] = 'opossum'
labelmap['striped skunk'] = 'skunk'
labelmap['lawn mower'] = 'human'
labelmap['Mower'] = 'human'
labelmap['red fox'] = 'fox'

In [None]:
autofocus_df.loc[:, 'label'] = autofocus_df.loc[:, 'label'].map(labelmap)

In [None]:
autofocus_df.loc[:, 'label'].unique()

In [None]:
autofocus_df = autofocus_df.groupby('filename').agg({'label': lambda x: ';'.join(x), 'location': min})

In [None]:
autofocus_df = autofocus_df.reset_index()

In [None]:
valid_locations = random.sample(autofocus_df.loc[:, 'location'].unique().tolist(), 10)

In [None]:
autofocus_df.loc[:, 'valid'] = autofocus_df.loc[:, 'location'].isin(valid_locations)

In [None]:
autofocus_df.to_csv('../../data/lpz_2016_2017/processed/multilabel.csv', index=False)

## Create the DataBunch

In [None]:
src = (ImageList.from_csv(DATA_DIR, 'multilabel.csv', folder='images', suffix='')
       .split_by_rand_pct(0.1)
       .label_from_df(label_delim=';')
      )

In [None]:
tfms = get_transforms()

In [None]:
data = (src.transform(tfms, size=224)
        .databunch().normalize(imagenet_stats))

In [None]:
data.show_batch(rows=3)

## Multiclassification

In [None]:
arch = models.resnet50

In [None]:
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
learn = create_cnn(data, arch, metrics=[acc_02, f_score])

We use the LR Finder to pick a good learning rate.

In [None]:
# learn.lr_find()

In [None]:
# learn.recorder.plot()

Then we can fit the head of our network.

In [None]:
lr = 3e-2

In [None]:
# learn.fit_one_cycle(2, slice(lr))

In [None]:
# learn.save('stage-1-rn50')

...And fine-tune the whole model:

In [None]:
# learn.unfreeze()

In [None]:
# learn.load('stage-2-rn34')

In [None]:
# learn.lr_find()
# learn.recorder.plot()

In [None]:
# learn.fit_one_cycle(1, slice(1e-5, lr/5))

In [None]:
# learn.save('stage-2-rn50')

In [None]:
# learn.get_preds()

In [None]:
# preds = _

In [None]:
# human_probs = preds[0][:, 8]

In [None]:
# human_pred = human_probs > .5

In [None]:
# is_human = np.array([int(item[0] == 8) for item in learn.data.valid_ds.y.items])

In [None]:
# from sklearn.metrics import accuracy_score, confusion_matrix, auc, roc_curve

In [None]:
# accuracy_score(human_pred, is_human)

In [None]:
# mat = confusion_matrix(is_human, human_pred)

In [None]:
# mat

In [None]:
# fpr, tpr, thresholds = roc_curve(is_human, human_probs, pos_label=1)

In [None]:
# tpr

In [None]:
# fig, ax = plt.subplots()
# plt.plot(fpr, tpr, color='darkorange')

In [None]:
# from sklearn.metrics import fbeta_score

In [None]:
# import numpy as np

# for thresh in np.linspace(0.1, .99, 99):
#     human_pred = human_probs > thresh
#     tp = sum(is_human & np.array(human_pred))
#     fp = sum(~is_human & np.array(human_pred))
#     fn = sum(is_human & ~np.array(human_pred))
#     precision = tp/(tp + fp)
#     recall = tp/(tp + fn)
#     fbeta = fbeta_score(y_true=is_human, y_pred=human_pred, beta=.1)
#     print(thresh, precision, recall, fbeta)

In [None]:
!ls /home/ec2-user/image_drive/autofocus/data/lpz_2016_2017/processed/models

In [None]:
data = (src.transform(tfms, size=512)
        .databunch(bs=32).normalize(imagenet_stats))

learn.data = data
learn.load('stage-2-256-rn50')

In [None]:
preds = learn.get_preds()

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, auc, roc_curve

In [None]:
thresh = .5

for labelnum in pd.Series(learn.data.valid_ds.y.items.sum()).value_counts().index:
    label = data.classes[labelnum]
    label_probs = preds[0][:, labelnum]
    has_label = np.array([int(item[0] == labelnum) for item in learn.data.valid_ds.y.items])
    
    label_pred = label_probs > thresh
    tp = sum(has_label & np.array(label_pred))
    tn = sum(~has_label & ~np.array(label_pred))
    fp = sum(~has_label & np.array(label_pred))
    fn = sum(has_label & ~np.array(label_pred))
    precision = tp/(tp + fp)
    recall = tp/(tp + fn)
    accuracy = (tp + tn)/(tp + tn + fp + fn)
    print(f'accuracy at {thresh}%: {accuracy}')
    print(f'precision at {thresh}%: {precision}')
    print(f'recall at {thresh}%: {recall}')

    fpr, tpr, thresholds = roc_curve(has_label, label_probs, pos_label=1)
    fig, ax = plt.subplots()
    ax.plot(fpr, tpr, color='darkorange')
    ax.set_title(label)
    plt.show()

In [None]:
data = (src.transform(tfms, size=512)
        .databunch(bs=32).normalize(imagenet_stats))

learn.data = data
data.train_ds[0][0].shape

In [None]:
learn.freeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
lr=1e-2/2

In [None]:
learn.fit_one_cycle(5, slice(lr))

In [None]:
learn.save('stage-1-512-rn50')

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, slice(1e-5, lr/5))

In [None]:
learn.save('stage-2-256-rn50')

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.export()

In [None]:
learn.load('stage-2-256-rn50')

In [None]:
lr = 3e-2

In [None]:
learn.fit_one_cycle(5, slice(1e-5, lr/5))

In [None]:
learn.save('stage-3-512-rn50')

In [None]:
learn.recorder.plot_losses()