In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload  # so when we change underlying code, it reloads automaticallyb
%autoreload 2
%matplotlib inline

In [None]:
from fastai.imports import *
from fastai.transformas import * 
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [None]:
PATH="data/dogscats/"
sz=224  # images will be resized to this (to speed training?)
torch.cuda.is_available()  # check that GPU is setup correctly 
torch.backends.cudnn.enable  # check for accelerated fncs

# ! wget http://files.fast.ai/data/dogscats.zip  # download data

In [None]:
# basic dataset exploration 
print(os.listdir(PATH))
print(os.listdir(f'{PATH}valid'))
files = os.listdir(f'{PATH}valid/cats')[:5]
print("files: {files}")
img = plt.imread(f'{PATH}valid/cats/{files[0]}')
plt.imshow(img)
print(img.shape)
print(img[:4,:4])


In [None]:
# Start with pretrained resnet module 
# more info: https://github.com/KaimingHe/deep-residual-networks
#... pretrained on ImageNet (1.2 million images with 1000 classes) 
#... resnet34 won the 2015 ImageNet competition
arch=resnet34
data = ImageClassifierData.from_paths(PATH, tfms=tfms_from_model(arch,sz))
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(0.01, 2)

In [None]:
# look at some results
# 1. few correct labels 
# 2. few incorrect labels 
# 3. most correct labels of each class
# 4. most incorrect of each class
# 5. most uncertain labels

In [None]:
data.val_y  # label for validation data 
data.classes  #['cats', 'dogs'] means cats = 0, dogs = 1 
log_preds = learn.predict()  # prediction for validation set
print(log_preds.shape)  # size of prediction matrix 
print(log_preds[:10])

preds = np.argmax(log_preds, axis=1)  # from log probs to 0 or 1 
probs = np.exp(log_preds[:,1])  # P(dog) 

In [2]:
# make some helper functions 
def rand_by_mask(mask):
    # get 4 random images from class
    return np.random.choice(np.where(mask)[0], 4, replace=False)

def rand_by_correct(is_correct):
    # get correct images 
    return rand_by_mask((preds==data.val_y)==is_correct)

def plot_val_with_title(idxs, title):
    imgs = np.stack([data.val_ds[x][0]] for x in idxs)
    title_probs = [probs[x] for x in idxs]
    print(title)
    return plots(data.val_ds.denorm(imgs), rows=1, titles=title_probs)

def plots(ims, figsize=(12,6), rows=1, titles=None):
    f = plt.figure(figsize=figsize)
    for ii in range(len(imgs)):
        sp = f.add_subplot(rows, len(ims)//rows, ii+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[ii], fontsize=16)
        plt.imshow(ims[i])
        
def load_img_id(ds, idx):
    return np.array(PIL.Image.open(PATH+ds.fnames[idx]))

def plot_val_with_title(idxs, title):
    imgs = [load_img_id(data.val_ds,x) for x in idxs]
    title_probs = [probs[x] for x in idxs]
    print(title)
    return plots(imgs, rows=1, titles=title_probs, figsize=(16,8))

def most_by_mask(mask, mult):
    idxs = np.where(mask)[0]
    return idxs[np.argsort(mult*probs[idxs])[:4]]

def most_by_correct(y, is_correct):
    mult = -1 if (y==1)==is_correct else 1
    return most_by_mask(((preds==data.val_y)==is_correct) \
            & (data.val_y==y), mult)

In [None]:
# 1. A few correct labels
plot_val_with_title(rand_by_correct(True), "Correct")


In [None]:
# 2. Incorrect labels 
plot_val_with_title(rand_by_correct(Falst), "Incorrect")


In [None]:
# 3. Most correct 
plot_val_with_title(most_by_correct(0, True), "Most correct cats")
plot_val_with_title(most_by_correct(1, True), "Most correct dogs")


In [None]:
# 4. Most incorrect
plot_val_with_title(most_by_correct(0, False), "Most incorrect cats")
plot_val_with_title(most_by_correct(1, False), "Most incorrect dogs")

In [None]:
# 5. Most uncertain 
most_uncertain = np.argsort(np.abs(probs - 0.5))[:4]
plot_val_with_title(most_uncertain, "Most uncertain")