In [1]:
## notebook setup
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import shutil
import glob
import numpy as np
from fastai import *
from fastai.vision import *
from challenge_setup import *

In [3]:
#import sys
#sys.path.append('../tests/')
#from challenge_setup import *

## Prepare Learner

In [59]:
src = (ImageFileList.from_folder(path)            
       .label_from_csv('train.csv', sep=' ', folder='train_combined', suffix='.png')  
       .random_split_by_pct(0.2))
tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)
bs = 64
sz = 512

In [60]:
# start with smaller size
data = (src.datasets()
        .transform(tfms, size=sz)
        .databunch(bs=bs).normalize(imagenet_stats))

In [61]:
# use pretrained model
arch = models.resnet34
f1_score = partial(fbeta, thresh=0.2, beta=1.)
learn = create_cnn(data, arch, metrics=f1_score)

In [62]:
t = learn.load('stage-1-512-rn34')

## Predictions part

In [63]:
# add test databunch
learn.data = (src.add_test_folder('test_combined')
        .datasets(ImageMultiDataset)
        .transform(tfms, size=sz)
        .databunch().normalize(imagenet_stats))



## Get ids of test images

In [64]:
def get_image_names(data):
    return [fn.stem for fn in data.test_dl.x]
fnames = get_image_names(learn.data); fnames[:2]
#len(fnames)

['cdda98c0-bad6-11e8-b2b9-ac1f6b6435d0',
 '0305dfb6-bad0-11e8-b2b8-ac1f6b6435d0']

# Run predictions

## Figure out threshold

In [65]:
p_v, t_v = learn.get_preds(DatasetType.Valid)



In [66]:
p_v = p_v.numpy()
t_v = t_v.numpy()

In [67]:
def sigmoid(a):
    return 1/(1+np.exp(-a))

In [68]:
sp_v = sigmoid(p_v) #compute the sigmoid of the network output

In [69]:
def f1_np(y_pred, y_true, threshold=0.5):
    '''numpy f1 metric'''
    y_pred = (y_pred>threshold).astype(int)
    TP = (y_pred*y_true).sum(1)
    prec = TP/(y_pred.sum(1)+1e-7)
    rec = TP/(y_true.sum(1)+1e-7)
    res = 2*prec*rec/(prec+rec+1e-7)
    return res.mean()


def f1_n(y_pred, y_true, thresh, n, default=0.5):
    '''partial f1 function for index n'''
    threshold = default * np.ones(y_pred.shape[1])
    threshold[n]=thresh
    return f1_np(y_pred, y_true, threshold)

def find_thresh(y_pred, y_true):
    '''brute force thresh finder'''
    ths = []
    for i in range(y_pred.shape[1]):
        aux = []
        for th in np.linspace(0,1,100):
            aux += [f1_n(y_pred, y_true, th, i)]
        ths += [np.array(aux).argmax()/100]
    return np.array(ths)

In [70]:
ths = find_thresh(sp_v, t_v); ths

array([0.51, 0.51, 0.  , 0.51, 0.  , 0.  , 0.  , 0.51, 0.51, 0.54, 0.72, 0.7 , 0.73, 0.73, 0.51, 0.  , 0.73, 0.73,
       0.73, 0.72, 0.51, 0.51, 0.63, 0.51, 0.73, 0.73, 0.51, 0.5 ])

In [71]:
f1_np(sp_v, t_v, 0.5), f1_np(sp_v, t_v, ths)

(0.11168262921627775, 0.2662175967592205)

## Now the predictions

In [72]:
preds_t = learn.TTA(scale=1.0, ds_type=DatasetType.Test, with_loss=False)[0]

Total time: 11:46



In [77]:
preds = sigmoid(preds_t.numpy())
threshold = ths
print(preds.shape)

(11702, 28)


In [78]:
classes = np.array([str(n) for n in range(28)])

In [79]:
res = np.array([" ".join(classes[(np.where(pp>threshold))])for pp in preds])

In [82]:
submit_df = pd.DataFrame(data={'Id':fnames, 'Predicted': res})

In [83]:
submit_df.sort_values(by='Id', inplace=True)
os.makedirs('data/submits', exist_ok=True)

In [84]:
submit_df.to_csv('data/submits/your_submission.csv', header=True, index=False)

In [85]:
from IPython.display import FileLink
FileLink('data/submits/your_submission.csv')