## Multi-label classification

### TO DO

- reduce learning rate?

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.conv_learner import *
from fastai.plots import *

# Py file for planet competition
from planet import f2

In [3]:
PATH = 'data/planet/'

In [4]:
ls {PATH}

[0m[01;34mmodels[0m/                   [01;34mtest-jpg[0m/                 [01;34mtmp[0m/        train_v2.csv
sample_submission_v2.csv  test_v2_file_mapping.csv  [01;34mtrain-jpg[0m/


In multi-label classification each sample can belong to one or more clases. In the previous example, the first images belongs to two clases: *haze* and *primary*. The second image belongs to four clases: *agriculture*, *clear*, *primary* and  *water*.

## Multi-label models for Planet dataset

In [5]:
# F2 for this competiotion
metrics=[f2]

# Using Resnext from pytorch 
f_model = resnet50
sz=64

label_csv = f'{PATH}train_v2.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)

We use a different set of data augmentations for this dataset - we also allow vertical flips, since we don't expect vertical orientation of satellite images to change our classifications.

In [6]:
def get_data(sz):
    # Augment our data
    tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.05)
    return ImageClassifierData.from_csv(PATH, 
                                        'train-jpg',
                                        label_csv, 
                                        bs=124,
                                        tfms=tfms,
                                        suffix='.jpg', 
                                        val_idxs=val_idxs, 
                                        test_name='test-jpg')

In [7]:
sz=64

In [8]:
data = get_data(sz)

In [None]:
data = data.resize(int(sz*1.3), 'tmp')

In [9]:
learn = ConvLearner.pretrained(f_model, data, metrics=metrics)

In [None]:
lrf=learn.lr_find()
learn.sched.plot()

In [None]:
lr = 0.2

In [None]:
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
learn.sched.plot_loss()

In [None]:
lrs = np.array([lr/9,lr/3,lr])

In [None]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.sched.plot_loss()

#### Change size to 128 x 128

In [None]:
sz=128

learn.set_data(get_data(sz))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
learn.sched.plot_loss()

In [None]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('resnet50_128')
learn.sched.plot_loss()

#### Change size to 256 x 256

In [None]:
sz=256

learn.set_data(get_data(sz))
learn.freeze()
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
learn.sched.plot_loss()

In [None]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
learn.save('resnet50_256')
learn.sched.plot_loss()

In [None]:
# [ 6.       0.07928  0.0792   0.93554]

multi_preds, y = learn.TTA()
preds = np.mean(multi_preds, 0)

In [None]:
f2(preds,y)

### End

In [10]:
sz=256
data = get_data(sz)
learn = ConvLearner.pretrained(f_model, data, metrics=metrics)
learn.load('resnet50_256')

In [None]:
prob_preds, y= learn.TTA(is_test=True)

In [None]:
classes = np.array(data.classes, dtype=str)
res = [" ".join(classes[np.where(pp > 0.2)]) for pp in prob_preds[0]] 

test_fnames = [os.path.basename(f).split(".")[0] for f in data.test_ds.fnames]
test_df = pd.DataFrame(res, index=test_fnames, columns=['tags'])

test_df.to_csv('planet_resnet50.csv', index_label='image_name')

In [None]:
from IPython.display import FileLink
FileLink('planet_resnet50.csv')