In [35]:
from fastai.conv_learner import *
from fastai.dataset import *

import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split

In [2]:
MODEL_PATH = 'Dn121_v1'
TRAIN = 'train/'
TEST = 'test/'
LABELS = 'train_labels.csv'
SAMPLE_SUB = 'sample_submission.csv'
ORG_SIZE=96
BATCH_SIZE = 128

In [3]:
arch = dn121 
nw = 4

In [7]:
train_df = pd.read_csv(LABELS).set_index('id')
train_names = train_df.index.values
train_labels = train_df['label'].values
print("Number of positive samples = {:.4f}%".format(np.count_nonzero(train_labels)*100/len(train_labels)))
test_names = [f.replace(".tif","") for f in os.listdir(TEST)]
tr_n, val_n = train_test_split(train_names, test_size=0.15, random_state=42069)
print(len(tr_n), len(val_n))

Number of positive samples = 40.5031%
187021 33004


In [9]:
class HCDDataset(FilesDataset):
    def __init__(self, fnames, path, transform):
        self.train_df = train_df
        super().__init__(fnames, transform, path)
        
    def get_x(self, i):
        img = open_image(os.path.join(self.path,self.fnames[i]+".tif"))
        # We crop the center of the original image for faster training time
        img = img[(ORG_SIZE-self.sz)//2:(ORG_SIZE+self.sz)//2,
                  (ORG_SIZE-self.sz)//2:(ORG_SIZE+self.sz)//2.:]
        return img

    def get_y(self, i):
        if (self.path == TEST): return 0
        return self.train_df.loc[self.fnames[i]]['label']
    
    def get_c(self):
        return 2

In [27]:
def get_data(sz, bs):
    aug_tfms = [RandomRotate(20,tfm_y=TfmType.NO),
                RandomDihedral(tfm_y=TfmType.NO)]
 
    tfms = tfms_from_model(arch, sz, crop_type=CropType.NO, tfm_y=TfmType.NO,
                          aug_tfms=aug_tfms)

    # Here we define the transforms that are performed for images when loaded. Only statistical regularization.
    #tfms = tfms_from_stats(stats, sz, crop_type=CropType.NO, tfm_y=TfmType.NO,
    #                       aug_tfms=aug_tfms)
    
    ds = ImageData.get_ds(HCDDataset, (tr_n[:-(len(tr_n)%bs)], TRAIN),
                         (val_n, TRAIN), tfms, test=(test_names, TEST))
    md = ImageData("./", ds, bs, num_workers=nw, classes=None)
    return md

### Compute image statistics
Doing these once is enough. **Do not use color/brightness/contrast augmentation here!** This statistics function is copied and altered from iafoss's kernel

This will give channel averages of [0.70185, 0.54483, 0.69568], and std's of [0.22262, 0.26757, 0.1995 ].

In [34]:
np.__version__

'1.15.4'

In [28]:
md = get_data(96, BATCH_SIZE)
x_tot = np.zeros(3)
x2_tot = np.zeros(3)
for x,y in iter(md.trn_dl):
    tmp =  md.trn_ds.denorm(x).reshape(BATCH_SIZE,-1)
    x = md.trn_ds.denorm(x).reshape(-1,3)
    x_tot += x.mean(axis=0)
    x2_tot += (x**2).mean(axis=0)

channel_avr = x_tot/len(md.trn_dl)
channel_std = np.sqrt(x2_tot/len(md.trn_dl) - channel_avr**2)
channel_avr,channel_std

TypeError: slice indices must be integers or None or have an __index__ method

In [15]:
len(tr_n[:-(len(tr_n)%BATCH_SIZE)])

187008

In [25]:
sz = 96
aug_tfms = [RandomRotate(20,tfm_y=TfmType.NO),
                RandomDihedral(tfm_y=TfmType.NO)]
tfms = tfms_from_model(arch, sz, crop_type=CropType.NO, tfm_y=TfmType.NO,
                          aug_tfms=aug_tfms)
tfms


([<fastai.transforms.Scale object at 0x7fed0aa6c390>, <fastai.transforms.RandomRotate object at 0x7fed0a9f4da0>, <fastai.transforms.RandomDihedral object at 0x7fed0aa6cbe0>, <fastai.transforms.NoCrop object at 0x7fed0aa6c470>, <fastai.transforms.Normalize object at 0x7fed0a9f4c50>, <fastai.transforms.ChannelOrder object at 0x7fed0aa6cb70>],
 [<fastai.transforms.Scale object at 0x7fed0aa6c5f8>, <fastai.transforms.NoCrop object at 0x7fed0aa6c080>, <fastai.transforms.Normalize object at 0x7fed0a9f4c50>, <fastai.transforms.ChannelOrder object at 0x7fed0aa6c1d0>])

In [None]:
learn = ConvLearner.pretrained(arch, md) 
learn.opt_fn = optim.Adam

In [None]:
learn.lr_find()
learn.sched.plot()
lr = 2e-2

In [None]:
learn.fit(lr, 1, cycle_len=2)
learn.unfreeze()
lrs = np.array([1e-4, 5e-4, 1.2e-3])
learn.fit(lrs, 1, cycle_len=5, use_clr=(20, 16))
learn.fit(lrs/4, 1, cycle_len=5, use_clr=(10, 8))

In [None]:
# preds_t,y_t = learn.predict_with_targs(is_test=True) # Predicting without TTA
preds_t,y_t = learn.TTA(is_test=True, n_aug=8)
preds_t = np.stack(preds_t, axis=-1)
preds_t = np.exp(preds_t)
preds_t = preds_t.mean(axis=-1)[:,1]

In [None]:
sample_df = pd.read_csv(SAMPLE_SUB)
sample_list = list(sample_df.id)
pred_list = [p for p in preds_t]
pred_dic = dict((key, value) for (key, value) in zip(learn.data.test_ds.fnames,pred_list))
pred_list_cor = [pred_dic[id] for id in sample_list]
df = pd.DataFrame({'id':sample_list,'label':pred_list_cor})
df.to_csv('submission.csv'.format(MODEL_PATH), header=True, index=False)