In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [None]:
PATH = "data/dogbreed/"

In [None]:
sz = 224
arch = inception_4
bs = 58

In [None]:
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv))) - 1
val_idxs = get_cv_idxs(n)

In [None]:
label_df = pd.read_csv(label_csv)

In [None]:
label_df.pivot_table(index='breed', aggfunc=len).sort_values('id', ascending=False)

In [None]:
def get_data(sz, bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test',
                                        val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)
    return data if sz>300 else data.resize(340, 'tmp')

In [None]:
data = get_data(sz, bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [None]:
learn.lr_find()

In [None]:
learn.sched.plot()

In [None]:
learn.fit(1e-2, 2)

In [None]:
learn.fit(1e-2, 3)

## Augment

In [None]:
from sklearn import metrics

In [None]:
data = get_data(sz, bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [None]:
learn.fit(1e-2, 2)

In [None]:
learn.precompute=False
learn.bn_freeze=True

In [None]:
learn.fit(1e-2, 5, cycle_len=1)

In [None]:
learn.save('224_i4_cr_pre')

In [None]:
learn.load('224_i4_cr_pre')

## Increase size

In [None]:
learn.set_data(get_data(350, bs))
learn.freeze()

In [None]:
learn.fit(1e-2, 3, cycle_len=1)

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.save('350_i4_cr_pre')

In [None]:
learn.load('350_i4_cr_pre')

In [None]:
learn.set_data(get_data(400, bs))
learn.freeze()

In [None]:
learn.fit(1e-2, 3, cycle_len=1)

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.save('400_i4_cr_pre')

In [None]:
learn.load('400_i4_cr_pre')

In [None]:
log_preds, y = learn.TTA()
probs = np.exp(log_preds)
accuracy(log_preds, y), metrics.log_loss(y, probs)

In [None]:
learn.save('400_i4_cr_pre')

In [None]:
log_preds, y = learn.TTA(is_test=True)
probs_i4_400 = np.exp(log_preds)
#accuracy(log_preds, y), metrics.log_loss(y, probs)

In [None]:
def save_array(fname, arr): c=bcolz.carray(arr, rootdir=fname, mode='w'); c.flush()

In [None]:
save_array('probs_i4_400.bc', probs_i4_400)

In [None]:
learn.fit(1e-2, 1, cycle_len=2)

In [None]:
learn.save('350_i4_pre')

In [None]:
log_preds, y = learn.TTA(is_test=True)
probs_i4_new = np.exp(log_preds)

In [None]:
save_array('probs_i4_new.bc', probs_i4_new)

## Use whole dataset now

In [None]:
learn.load('299_pre')

In [None]:
def get_data_whole(sz, bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test',
                                        val_idxs=[0], suffix='.jpg', tfms=tfms, bs=bs)
    return data if sz>300 else data.resize(340, 'tmp')

In [None]:
def load_array(fname): return bcolz.open(fname)[:]

In [None]:
data = get_data_whole(sz, bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.freeze()

In [None]:
learn.fit(1e-2, 5)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [None]:
learn.fit(1e-2, 2)

In [None]:
learn.precompute = False
learn.bn_freeze = True

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.set_data(get_data_whole(299, bs))
learn.freeze()

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
log_preds, y = learn.TTA(is_test=True)
probs_resnx101_64_full = np.exp(log_preds)
#accuracy(log_preds, y), metrics.log_loss(y, probs)

In [None]:
save_array('probs_resnx101_64_full.bc', probs_resnx101_64_full)

In [None]:
learn.save('299_full')

## Try ensembling
### Use another arch first

## Save to file for submission

In [None]:
df = pd.DataFrame(probs_i4_400)
df.columns = data.classes
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])

In [None]:
SUBM = f'{PATH}results_2/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}submission100.csv', index=False)
#df.to_csv(f'{SUBM}subm.gz', compression='gzip', index=False)

## Individual prediction

In [None]:
fn = data.val_ds.fnames[0]

In [None]:
fn

In [None]:
Image.open(PATH+fn).resize((150, 150))

In [None]:
trn_tfms, val_tfms = tfms_from_model(arch, sz)

In [None]:
ds = FilesIndexArrayDataset([fn], np.array([0]), val_tfms, PATH)
dl = DataLoader(ds)
preds = learn.predict_dl(dl)
np.argmax(preds)

In [None]:
im = trn_tfms(Image.open(PATH+fn))
preds = to_np(learn.model(V(T(im[None]).cude())))
np.argmax(preds)

In [None]:
trn_tfms, val_tfms = tfms_from_model(arch, sz)

In [None]:
im = trn_tfms(Image.open(PATH+fn))
preds = learn.predict_array(im[None])
np.argmax(preds)

## Confusion matrix

In [None]:
preds = np.argmax(log_preds, axis=1)
probs = np.exp(log_preds[:,1])

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, preds)

In [None]:
plot_confusion_matrix(cm, data.classes)