In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path
import json
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects

In [5]:
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1],a[2]-a[0]])

def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False,
                                        edgecolor='white', lw=2))
    draw_outline(patch, 4)

def draw_text(ax, xy, txt, sz=14):
    text = ax.text(*xy, txt, verticalalignment='top',
                   color='white', fontsize=sz, weight='bold')
    draw_outline(text, 1)
    
def draw_im(im, ann):
    ax = show_img(im, figsize=(16,8))
    for b,c in ann:
        b = bb_hw(b)
        draw_rect(ax, b)
        draw_text(ax, b[:2], cats[c], sz=16)

def draw_idx(i):
    im_a = trn_anno[i]    
    im = open_image(IMG_PATH/trn_fns[i])
    print(im.shape)
    draw_im(im, im_a)
    
def get_lrg(b):
    if not b: raise Exeption()
    b = sorted(b, key=lambda x: np.product(x[0][-2:]-x[0][:2]), reverse=True)
    return b[0]

In [6]:
PATH = Path('data/pascal')

trn_j = json.load((PATH / 'pascal_train2007.json').open())
IMAGES, ANNOTATIONS, CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'
cats = {o[ID]: o['name'] for o in trn_j[CATEGORIES]}
trn_fns = dict((o[ID], o[FILE_NAME]) for o in trn_j[IMAGES])
trn_ids = [o[ID] for o in trn_j[IMAGES]]

JPEGS = 'VOCdevkit/VOC2007/JPEGImages'
IMG_PATH = PATH/JPEGS

trn_anno = collections.defaultdict(list)
for o in trn_j[ANNOTATIONS]:
    if not o['ignore']:
        bb = o[BBOX]
        bb = np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
        trn_anno[o[IMG_ID]].append((bb, o[CAT_ID]))
        
trn_lrg_anno = {a: get_lrg(b) for a,b in trn_anno.items()}

In [8]:
(PATH/'tmp').mkdir(exist_ok=True)
CSV = PATH/'tmp/lrg.csv'
df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids],
                   'cat': [cats[trn_lrg_anno[o][1]] for o in trn_ids]},
                   columns=['fn','cat'])
df.to_csv(CSV, index=False)
pd.read_csv(CSV).head()

Unnamed: 0,fn,cat
0,000012.jpg,car
1,000017.jpg,horse
2,000023.jpg,person
3,000026.jpg,car
4,000032.jpg,aeroplane


In [9]:
f_model = resnet34
sz=224
bs=64

tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_side_on, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, CSV, tfms=tfms)

In [11]:
x,y=next(iter(md.val_dl))
#show_img(md.val_ds.denorm(to_np(x))[0]);

In [12]:
learn = ConvLearner.pretrained(f_model, md, metrics=[accuracy])
learn.opt_fn = optim.Adam
lr = 2e-2
learn.fit(lr, 1, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                 
    0      1.302087   0.668427   0.809495  



[0.6684272289276123, 0.8094951957464218]

In [75]:
lrs = np.array([lr/1000,lr/100,lr])
learn.freeze_to(-2)
learn.fit(lrs/5, 1, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                  
    0      0.363757   0.61983    0.821064  



[0.6198300942778587, 0.8210637047886848]

In [14]:
learn.unfreeze()
learn.fit(lrs/5, 1, cycle_len=2)

epoch      trn_loss   val_loss   accuracy                  
    0      0.652215   0.562579   0.822566  
    1      0.448812   0.576637   0.824519                  



[0.5766368880867958, 0.8245192319154739]

In [73]:
learn.load('class_one')

In [15]:
x,y = next(iter(md.val_dl))
probs = F.softmax(predict_batch(learn.model, x), -1)
x,preds = to_np(x),to_np(probs)
preds = np.argmax(preds, -1)
preds[:10]

array([14, 11,  2, 14, 14,  6, 13,  8,  2, 16])

In [77]:
predict_batch(learn.model, x)

Variable containing:
-1.4912e+01 -5.8179e+00 -1.2194e+01  ...  -1.3608e+01 -1.3405e+01 -1.3641e+01
-1.1735e+01 -9.0272e+00 -1.3055e+01  ...  -1.6911e+00 -1.3717e+01 -9.5481e+00
-8.4287e+00 -1.2785e+01 -1.7634e-02  ...  -1.4863e+01 -1.3429e+01 -1.3263e+01
                ...                   ⋱                   ...                
-1.2520e+01 -9.3879e+00 -1.3070e+01  ...  -4.2500e+00 -1.2885e+01 -1.1605e+01
-1.3474e+01 -1.1209e+01 -1.2944e+01  ...  -7.4971e+00 -1.2352e+01 -1.1298e+01
-7.4638e+00 -1.0668e+01 -1.0699e+01  ...  -1.2035e+01 -7.9204e+00 -8.9310e+00
[torch.cuda.FloatTensor of size 64x20 (GPU 0)]

In [76]:
x,y = next(iter(md.val_dl))
learn.model(VV(x))

Variable containing:
-1.4912e+01 -5.8179e+00 -1.2194e+01  ...  -1.3608e+01 -1.3405e+01 -1.3641e+01
-1.1735e+01 -9.0272e+00 -1.3055e+01  ...  -1.6911e+00 -1.3717e+01 -9.5481e+00
-8.4287e+00 -1.2785e+01 -1.7634e-02  ...  -1.4863e+01 -1.3429e+01 -1.3263e+01
                ...                   ⋱                   ...                
-1.2520e+01 -9.3879e+00 -1.3070e+01  ...  -4.2500e+00 -1.2885e+01 -1.1605e+01
-1.3474e+01 -1.1209e+01 -1.2944e+01  ...  -7.4971e+00 -1.2352e+01 -1.1298e+01
-7.4638e+00 -1.0668e+01 -1.0699e+01  ...  -1.2035e+01 -7.9204e+00 -8.9310e+00
[torch.cuda.FloatTensor of size 64x20 (GPU 0)]

In [43]:
np.argmax(to_np(F.softmax(predict_batch(learn.model, x))), -1)

array([14, 11,  2, 14, 14,  6, 13,  8,  2, 16, 15,  6, 17,  7, 12,  0, 14, 14,  7, 19,  1,  8, 14, 13, 14,
       14, 14,  5,  9, 18, 13,  0,  2,  6, 18, 11, 12,  6,  0, 10,  6, 13, 12, 14,  3, 13, 14,  7, 12, 13,
       12, 14, 13,  2, 14, 11,  6,  0,  2,  2,  2,  7, 14,  9])