# Dog Breed

In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
# This file contains all the main external libs we'll use
from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [3]:
PATH = "data/dogbreed/"
sz = 299
arch = resnext101_64
bs = 58

In [4]:
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv))) - 1
val_idxs = get_cv_idxs(n)

## Initial exploration

In [None]:
!ls {PATH}

In [None]:
files = !ls {PATH}test/ | head
files

In [None]:
label_df = pd.read_csv(label_csv)

In [None]:
label_df.head()

In [None]:
label_df.pivot_table(index='breed', aggfunc=len).sort_values('id', ascending=False)

In [None]:
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', tfms=tfms, val_idxs=val_idxs, test_name='test',
                                   suffix='.jpg', bs=bs)

In [None]:
fn = PATH+data.trn_ds.fnames[0]; fn

In [None]:
img = PIL.Image.open(fn); img

In [None]:
img.size

In [None]:
size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}

In [None]:
row_sz, col_sz = list(zip(*size_d.values()))

In [None]:
row_sz = np.array(row_sz); col_sz = np.array(col_sz)

In [None]:
row_sz[:5]

In [None]:
plt.hist(row_sz);

In [None]:
plt.hist(row_sz[row_sz<1000])

In [None]:
plt.hist(col_sz)

In [None]:
plt.hist(col_sz[col_sz<1000])

In [None]:
len(data.trn_ds), len(data.test_ds)

In [None]:
len(data.classes), data.classes[:5]

## Initial Model

In [5]:
def get_data(sz, bs):
    
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', tfms=tfms, val_idxs=val_idxs, test_name='test',
                                        suffix='.jpg', bs=bs, num_workers=4)
    return data if sz > 300 else data.resize(340, 'tmp')

In [6]:
data = get_data(sz, bs)

A Jupyter Widget

                                                      


In [7]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(1e-1, 1)

100%|██████████| 141/141 [08:10<00:00,  3.48s/it]
100%|██████████| 36/36 [02:03<00:00,  3.43s/it]
100%|██████████| 179/179 [10:21<00:00,  3.47s/it]


A Jupyter Widget

[ 0.       0.68984  0.3556   0.89265]                         



## Augment

In [8]:
from sklearn import metrics

In [9]:
learn.precompute = False

In [10]:
learn.fit(1e-1, 3, cycle_len=1)

A Jupyter Widget

[ 0.       0.37548  0.25663  0.91708]                        
[ 1.       0.29412  0.24158  0.92234]                        
[ 2.       0.26259  0.23588  0.92234]                        



In [11]:
learn.save('299_pre')

In [12]:
learn.load('299_pre')

## Increase Size

In [13]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult = 2)

A Jupyter Widget

[ 0.       0.19203  0.2324   0.91995]                        
[ 1.       0.18704  0.2315   0.92426]                        
[ 2.       0.19822  0.2291   0.92426]                        
[ 3.       0.17883  0.22619  0.92666]                        
[ 4.       0.16566  0.22521  0.92905]                        
[ 5.       0.15209  0.22437  0.92761]                        
[ 6.       0.15575  0.22509  0.92666]                        



In [14]:
log_preds, y = learn.TTA(is_test=True)
probs = np.exp(log_preds)
#accuracy(log_preds, y), metrics.log_loss(y, probs)

In [None]:
learn.save('350_mul_pre')

In [None]:
learn.load('350_mul_pre')

In [None]:
test_preds = learn.TTA(is_test=True)

In [None]:
test_preds = np.exp(test_preds[0])

In [None]:
test_preds.shape

In [None]:
data.test_dl.dataset.fnames

In [None]:
test_preds

In [16]:
filenames = data.test_dl.dataset.fnames

In [17]:
ids = [f[5:f.find('.')] for f in filenames]

In [18]:
sub = pd.DataFrame(probs)

In [19]:
col_names = data.classes

In [20]:
sub.columns = col_names

In [21]:
sub.head(5)

Unnamed: 0,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,1.076856e-06,1.770532e-05,1.57704e-07,1.527007e-07,5.188913e-08,7.051919e-09,2.899292e-07,1.526249e-08,5.084071e-07,1.820827e-08,...,5.435342e-06,5.876904e-08,2.211522e-06,1.903064e-07,2.657068e-06,2.208191e-07,3.391328e-08,1.582019e-08,9.78766e-07,4.953033e-08
1,3.398505e-07,4.159633e-05,3.518447e-05,4.332278e-05,8.798211e-07,3.091706e-06,3.55437e-06,3.181699e-06,1.269998e-06,1.116907e-07,...,7.978977e-07,1.539339e-07,0.0001368935,1.065915e-06,2.484633e-05,8.071233e-06,7.182746e-07,8.119663e-06,1.522531e-06,4.520743e-06
2,6.183953e-09,2.204296e-07,2.466229e-09,1.298403e-08,1.460211e-09,4.016386e-08,1.433092e-08,2.789505e-10,5.498121e-08,5.700088e-09,...,3.937001e-10,7.607945e-09,5.112918e-07,5.672781e-06,2.902568e-07,3.01862e-10,7.369441e-10,1.057383e-09,9.008361e-10,5.073855e-09
3,3.380506e-05,2.100677e-08,9.31873e-09,3.573362e-10,6.293524e-08,1.707708e-08,1.603564e-08,1.577443e-11,5.565847e-09,9.391567e-09,...,6.600093e-06,7.950264e-09,3.314536e-09,3.442136e-10,2.303937e-08,9.960132e-10,3.836324e-08,5.376064e-09,8.138982e-10,0.0001635034
4,2.9999e-08,5.193824e-08,1.293711e-07,2.427845e-09,5.202502e-07,1.309095e-08,4.561456e-06,6.84735e-09,7.176663e-08,8.921669e-06,...,0.0001156562,6.306162e-07,1.363942e-07,2.827567e-09,4.520208e-08,2.673057e-08,8.350018e-06,1.010148e-08,1.833374e-06,5.900491e-06


In [22]:
sub.insert(0, 'id', ids)

In [23]:
sub.tail(5)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
10352,d7cbc532e819d760ab9e418654cef03f,6.268459e-07,9.758544e-07,8.270808e-07,3.403844e-07,5.588909e-06,3.975547e-07,0.0004376219,0.0005747105,1.752363e-06,...,1.705889e-07,1.105771e-06,4.209434e-08,8.324439e-08,9.245452e-09,3.936618e-06,3.413984e-05,4.316437e-07,2.027513e-06,1.264117e-06
10353,4250d92ca08a7c19bc0f6cb375a2b4ed,8.502207e-07,5.263835e-08,2.364226e-06,1.240829e-09,0.3155688,1.603755e-06,2.547312e-07,1.057684e-05,4.917315e-06,...,1.818661e-08,3.220006e-07,1.763911e-07,5.680766e-06,9.1485e-07,1.503754e-07,1.316762e-05,5.313472e-05,6.961033e-07,4.21822e-07
10354,a191db53dbd439ca46590a60906e4e55,0.0007006122,0.0002893562,4.290245e-05,0.0003522918,9.32808e-06,2.136298e-05,0.006412952,3.892231e-05,4.770539e-05,...,0.0001413018,0.000817626,5.837555e-05,6.332245e-05,3.918529e-05,7.274003e-05,0.003714273,0.00017367,0.03717621,0.003077052
10355,864e7e1f739a6f4a905c9d512aae5e43,0.02589602,5.931244e-10,3.094869e-11,2.937382e-11,8.481818e-10,1.012685e-09,3.083289e-09,3.752991e-12,6.697141e-10,...,1.128052e-09,5.138723e-11,5.712272e-11,6.332572e-12,3.849122e-11,6.683126e-11,2.157109e-10,1.455125e-11,2.958104e-12,1.682598e-09
10356,7d456cad378a38055723949d8cbbb811,1.515554e-06,6.993865e-05,2.262993e-06,7.23726e-05,2.175841e-06,7.475277e-06,0.0001451245,1.307404e-06,3.146297e-05,...,0.0002680049,3.749651e-06,5.491414e-06,5.714373e-06,9.780388e-06,1.697693e-05,0.0003022556,7.936455e-07,0.0006897535,3.271978e-05


In [24]:
sub.to_csv(f'{PATH}results/submission4.csv', index=False)