In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [4]:
PATH = "data/dogbreed/"

In [5]:
sz = 224
arch = resnext101_64
bs = 58
# Jeremy tried 224, 58 and 299, 58

In [6]:
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv))) - 1
val_idxs = get_cv_idxs(n)

In [6]:
label_df = pd.read_csv(label_csv)

In [None]:
label_df.pivot_table(index='breed', aggfunc=len).sort_values('id', ascending=False)

In [7]:
def get_data(sz, bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test',
                                        val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)
    return data if sz>300 else data.resize(340, 'tmp')

## Precompute

In [8]:
data = get_data(sz, bs)

A Jupyter Widget




In [9]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [10]:
learn.fit(1e-2, 5)

A Jupyter Widget

[ 0.       0.96754  0.39239  0.9023 ]                        
[ 1.       0.47041  0.29658  0.91427]                         
[ 2.       0.32048  0.26299  0.9205 ]                         
[ 3.       0.24852  0.24861  0.92385]                         
[ 4.       0.19576  0.2404   0.92529]                         



## Augment

In [11]:
from sklearn import metrics

In [12]:
data = get_data(sz, bs)

A Jupyter Widget




In [13]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [14]:
learn.fit(1e-2, 2)

A Jupyter Widget

[ 0.       1.19333  0.42347  0.90757]                       
[ 1.       0.54787  0.30274  0.92241]                        



In [15]:
learn.precompute=False
learn.bn_freeze=True

In [16]:
learn.fit(1e-2, 5, cycle_len=1)

A Jupyter Widget

[ 0.       0.45747  0.26905  0.92624]                        
[ 1.       0.42457  0.26033  0.92385]                        
[ 2.       0.37737  0.25164  0.92529]                        
[ 3.       0.35556  0.24186  0.92577]                        
[ 4.       0.34313  0.23638  0.92624]                        



In [17]:
learn.save('224_pre')

In [18]:
learn.load('224_pre')

## Increase size

In [19]:
learn.set_data(get_data(299, bs))
learn.freeze()

A Jupyter Widget




In [20]:
learn.fit(1e-2, 3, cycle_len=1)

A Jupyter Widget

[ 0.       0.34264  0.22406  0.93151]                        
[ 1.       0.30557  0.21987  0.93439]                        
[ 2.       0.296    0.21562  0.9296 ]                        



In [21]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

A Jupyter Widget

[ 0.       0.26962  0.218    0.93103]                        
[ 1.       0.27629  0.21015  0.93534]                        
[ 2.       0.24661  0.20852  0.93391]                        
[ 3.       0.25752  0.20398  0.9387 ]                        
[ 4.       0.23323  0.20119  0.93534]                        
[ 5.       0.22318  0.19704  0.93918]                        
[ 6.       0.20048  0.196    0.9387 ]                        



In [22]:
log_preds, y = learn.TTA()
probs = np.exp(log_preds)
accuracy(log_preds, y), metrics.log_loss(y, probs)

                                              

(0.94422700587084152, 0.19929996627362331)

In [23]:
learn.save('299_pre')

In [24]:
learn.load('299_pre')

In [25]:
learn.fit(1e-2, 1, cycle_len=2)

A Jupyter Widget

[ 0.       0.21182  0.20144  0.9363 ]                        
[ 1.       0.20089  0.19496  0.93726]                        



In [26]:
learn.save('299_pre')

In [27]:
log_preds, y = learn.TTA(is_test=True)
probs_resnx101_64 = np.exp(log_preds)
#accuracy(log_preds, y), metrics.log_loss(y, probs)

                                              

In [7]:
def save_array(fname, arr): c=bcolz.carray(arr, rootdir=fname, mode='w'); c.flush()

In [29]:
save_array('probs_resnx101_64.bc', probs_resnx101_64)

In [30]:
learn.save('299_pre')

In [56]:
learn.load('299_pre')

In [57]:
learn.set_data(get_data(400, bs))
learn.freeze()

## Use whole dataset now

In [31]:
learn.load('299_pre')

In [32]:
def get_data_whole(sz, bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test',
                                        val_idxs=[0], suffix='.jpg', tfms=tfms, bs=bs)
    return data if sz>300 else data.resize(340, 'tmp')

In [8]:
def load_array(fname): return bcolz.open(fname)[:]

In [34]:
data = get_data_whole(sz, bs)

A Jupyter Widget




In [35]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.freeze()

100%|██████████| 177/177 [04:45<00:00,  1.61s/it]
100%|██████████| 1/1 [00:00<00:00,  2.82it/s]


Process Process-931:
Process Process-929:
Process Process-934:
Traceback (most recent call last):
Process Process-930:
Traceback (most recent call last):
Process Process-932:
Process Process-936:
Process Process-933:
Traceback (most recent call last):
Process Process-935:
Traceback (most recent call last):
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home

In [36]:
learn.fit(1e-2, 5)

A Jupyter Widget

[ 0.       0.75786  0.01884  1.     ]                         
[ 1.       0.38308  0.00202  1.     ]                         
[ 2.       0.27529  0.00073  1.     ]                         
[ 3.       0.22052  0.00052  1.     ]                         
[ 4.       0.19112  0.00022  1.     ]                         



In [37]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [38]:
learn.fit(1e-2, 2)

A Jupyter Widget

[ 0.       0.94042  0.00524  1.     ]                         
[ 1.       0.48962  0.00265  1.     ]                         



Process Process-1150:
Process Process-1149:
Process Process-1147:
Process Process-1146:
Process Process-1165:
Process Process-1163:
Process Process-1164:
Process Process-1161:
Process Process-1148:
Process Process-1145:
Process Process-1152:
Process Process-1151:
Process Process-1162:
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-1166:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/

  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()

In [39]:
learn.precompute = False
learn.bn_freeze = True

In [40]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

A Jupyter Widget

[ 0.       0.44712  0.00127  1.     ]                        
[ 1.       0.39743  0.00121  1.     ]                        
[ 2.       0.34848  0.00075  1.     ]                        
[ 3.       0.35622  0.00137  1.     ]                        
[ 4.       0.31755  0.00057  1.     ]                        
[ 5.       0.28701  0.00074  1.     ]                        
[ 6.       0.2883   0.00068  1.     ]                        



Process Process-1371:
Process Process-1372:
Process Process-1375:
Process Process-1369:
Process Process-1385:
Process Process-1374:
Process Process-1391:
Process Process-1370:
Process Process-1376:
Process Process-1389:
Process Process-1392:
Process Process-1387:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-1390:
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (m

  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py", line 96, in __

In [41]:
learn.set_data(get_data_whole(299, bs))
learn.freeze()

A Jupyter Widget




In [42]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

A Jupyter Widget

[ 0.       0.29342  0.00362  1.     ]                        
[ 1.       0.27589  0.00238  1.     ]                        
[ 2.       0.25299  0.00213  1.     ]                        
[ 3.       0.24732  0.00253  1.     ]                        
[ 4.       0.23149  0.00122  1.     ]                        
[ 5.       0.22563  0.00168  1.     ]                        
[ 6.       0.20624  0.00182  1.     ]                        



In [43]:
log_preds, y = learn.TTA(is_test=True)
probs_resnx101_64_full = np.exp(log_preds)
#accuracy(log_preds, y), metrics.log_loss(y, probs)

                                                 

In [44]:
save_array('probs_resnx101_64_full.bc', probs_resnx101_64_full)

In [45]:
learn.save('299_full')

## Try ensembling
### Use another arch first

In [9]:
probs_1 = load_array('probs_i4_400.bc')

In [12]:
probs_2 = load_array('probs_resnx101_64.bc')

OSError: data directory does not exist

In [11]:
probs_3 = load_array('probs_ir2.bc')

OSError: data directory does not exist

In [14]:
probs_4 = load_array('probs_i4_new.bc')

OSError: data directory does not exist

In [68]:
avg_probs = (probs_1 + probs_2 + probs_3 + probs_4)/4

In [69]:
df = pd.DataFrame(avg_probs)
df.columns = data.classes
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])

In [70]:
SUBM = f'{PATH}results_2/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}submission15.csv', index=False)
#df.to_csv(f'{SUBM}subm.gz', compression='gzip', index=False)

## Save to file for submission

### Using probs

In [46]:
df = pd.DataFrame(probs_resnx101_64_full)
df.columns = data.classes
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])

In [47]:
df.head()

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,fd1a7be32f10493735555e62913c0841,2.281988e-05,1.718267e-05,3.157325e-07,1.290186e-06,3.807626e-07,1.097992e-07,3.106692e-07,1.333597e-07,2.658056e-06,...,6.670927e-05,3.704084e-07,1.138617e-06,1.428896e-06,2.462374e-07,5.6297e-07,8.057501e-07,4.163923e-07,1.452599e-05,1.316659e-06
1,5273d42f02b4715cb93845205995ef64,1.464182e-06,1.320414e-06,1.609152e-05,0.0002417042,1.277238e-07,1.977732e-07,3.595007e-07,1.61844e-07,5.517606e-07,...,1.494643e-08,2.771197e-08,4.071355e-05,9.38777e-07,2.94972e-05,1.503945e-06,1.557243e-06,1.569086e-05,6.576714e-06,1.38373e-06
2,442057a3142f4d75a1023db363e2fb54,1.782943e-07,1.525702e-06,6.398744e-08,1.194721e-06,2.605385e-08,2.10123e-06,1.088276e-08,3.558117e-08,4.072102e-06,...,4.531715e-08,2.873568e-07,1.172894e-06,4.996165e-05,2.204623e-06,4.896736e-09,8.014931e-08,8.945806e-08,2.587053e-08,5.651354e-08
3,fb03302b030afd122e0f4936c158f6c1,0.0004688526,4.132627e-07,3.817055e-08,9.030007e-09,1.582217e-07,1.788543e-08,1.213442e-06,5.016984e-08,3.073838e-07,...,1.555514e-05,3.788595e-07,1.820666e-07,3.108795e-08,4.983715e-07,2.551992e-08,9.197489e-07,2.113611e-07,4.362183e-08,0.0002867217
4,511ca6e3539192aa5c8747bb697e0525,3.314514e-07,8.850526e-08,3.825067e-07,2.684937e-08,7.531769e-07,2.584413e-07,1.060611e-05,1.114979e-07,4.748778e-07,...,4.98888e-05,2.056782e-06,3.697005e-07,1.225711e-08,1.84038e-07,8.348931e-07,9.16466e-06,5.706365e-07,1.356091e-06,2.122605e-05


In [48]:
SUBM = f'{PATH}results_2/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}submission10.csv', index=False)
#df.to_csv(f'{SUBM}subm.gz', compression='gzip', index=False)

### Using probs_full

In [None]:
df = pd.DataFrame(probs)
df.columns = data.classes
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])

In [None]:
df.head()

In [None]:
SUBM = f'{PATH}results_2/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}submission7.csv', index=False)
#df.to_csv(f'{SUBM}subm.gz', compression='gzip', index=False)

## Individual prediction

In [None]:
fn = data.val_ds.fnames[0]

In [None]:
fn

In [None]:
Image.open(PATH+fn).resize((150, 150))

In [None]:
trn_tfms, val_tfms = tfms_from_model(arch, sz)

In [None]:
ds = FilesIndexArrayDataset([fn], np.array([0]), val_tfms, PATH)
dl = DataLoader(ds)
preds = learn.predict_dl(dl)
np.argmax(preds)

In [None]:
im = trn_tfms(Image.open(PATH+fn))
preds = to_np(learn.model(V(T(im[None]).cude())))
np.argmax(preds)

In [None]:
trn_tfms, val_tfms = tfms_from_model(arch, sz)

In [None]:
im = trn_tfms(Image.open(PATH+fn))
preds = learn.predict_array(im[None])
np.argmax(preds)

## Confusion matrix

In [None]:
preds = np.argmax(log_preds, axis=1)
probs = np.exp(log_preds[:,1])

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, preds)

In [None]:
plot_confusion_matrix(cm, data.classes)