In [3]:
print("Setting CUDA devices...")
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="5"


from fastai.conv_learner import *
from fastai.dataset import *

import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split


Setting CUDA devices...


ModuleNotFoundError: No module named 'isoweek'

In [None]:
DATASET_ROOT = '/media/data-nvme/dev/datasets/airbus/'
PATH = DATASET_ROOT
TRAIN = DATASET_ROOT + 'train_v2/'
TEST = DATASET_ROOT + 'test_v2/'
SEGMENTATION = DATASET_ROOT + 'train_ship_segmentations_v2.csv'
exclude_list = ['6384c3e78.jpg'] #corrupted image
PRETRAINED = DATASET_ROOT + 'models/resnet34s256_kaggle-airbus-l0.053-a0.981.h5'

In [None]:
#!ls {TRAIN}

In [None]:
!rm -r {PATH}tmp # do that to avoid

In [None]:
nw = 12   #number of workers for data loader
arch = resnext50 #specify target architecture

In [None]:
train_names = [f for f in os.listdir(TRAIN)]
test_names = [f for f in os.listdir(TEST)]
for el in exclude_list:
    if(el in train_names): train_names.remove(el)
    if(el in test_names): test_names.remove(el)
#5% of data in the validation set is sufficient for model evaluation
tr_n, val_n = train_test_split(train_names, test_size=0.05, random_state=42)

In [None]:
class pdFilesDataset(FilesDataset):
    def __init__(self, fnames, path, transform):
        self.segmentation_df = pd.read_csv(SEGMENTATION).set_index('ImageId')
        super().__init__(fnames, transform, path)
    
    def get_x(self, i):
        img = open_image(os.path.join(self.path, self.fnames[i]))
        if self.sz == 768: return img 
        else: return cv2.resize(img, (self.sz, self.sz))
    
    def get_y(self, i):
        if(self.path == TEST): return 0
        masks = self.segmentation_df.loc[self.fnames[i]]['EncodedPixels']
        if(type(masks) == float): return 0 #NAN - no ship 
        else: return 1
    
    def get_c(self): return 2 #number of classes

In [None]:
def get_data(sz,bs):
    #data augmentation
    aug_tfms = [RandomRotate(20, tfm_y=TfmType.NO),
                RandomDihedral(tfm_y=TfmType.NO),
                RandomLighting(0.05, 0.05, tfm_y=TfmType.NO)]
    tfms = tfms_from_model(arch, sz, crop_type=CropType.NO, tfm_y=TfmType.NO, 
                aug_tfms=aug_tfms)
    ds = ImageData.get_ds(pdFilesDataset, (tr_n[:-(len(tr_n)%bs)],TRAIN), 
                (val_n,TRAIN), tfms, test=(test_names,TEST))
    md = ImageData(PATH, ds, bs, num_workers=nw, classes=None)
#     md.is_multi = False
    return md

In [None]:
sz = 256 #image size
bs = 350  #batch size

md = get_data(sz,bs)

In [None]:
# learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
# learn.opt_fn = optim.Adam

In [None]:
#learn.lr_find()
#learn.sched.plot()

In [None]:
#learn.fit(2e-3, 1)

In [None]:
# learn.unfreeze()
# lr=np.array([1e-4,5e-4,2e-3])

In [None]:
# def save_when_acc(self, metrics):
#     loss, acc = metrics[0], metrics[1]
#     filename = "{}-l{:.3f}-a{:.3f}".format(self.name, loss[0], acc)
#     if self.best_acc == None or acc > self.best_acc:
#         self.best_acc = acc
#         self.best_loss = loss
#         self.model.save(f'{filename}')
#     elif acc == self.best_acc and  loss < self.best_loss:
#         self.best_loss = loss
#         self.model.save(f'{filename}')
# SaveBestModel.save_when_acc = save_when_acc
def save_when_acc(self, metrics):
    filename = "{}-{}".format(self.name, random.randint(1,1000))
    print(filename)
    self.model.save(f'{filename}')

SaveBestModel.save_when_acc = save_when_acc

In [None]:
#learn.fit(lr, 1, cycle_len=2, use_clr=(20,8), best_save_name=arch.__name__ + "_kaggle-airbus")

In [None]:
#learn.sched.plot_lr()

In [None]:
#learn.save('Resnet34_lable_256_1')

## Do many times

In [None]:
# A tester :
'''
[1, 2, (20,8), 384, 32],
'''
training_loop = [
    [1, 1, (40,10), 64, 2000],
    [1, 1, (40,10), 128, 1200],
    [1, 1, (40,10), 256, 600],
    [1, 1, (40,10), 256, 600],
    [1, 1, (40,10), 384, 350],
    [1, 1, (40,10), 768, 150]
]
i = 0
md = get_data(sz,bs)
learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
learn.opt_fn = optim.Adam
#learn.load(PRETRAINED)
#learn.fit(2e-3, 1, best_save_name='ResNet34_s' + str(sz) + "_kaggle-airbus")
learn.unfreeze()
lr=np.array([1e-4,5e-4,2e-3])
for epochs, cycle_len, use_clr, sz, bs in training_loop:
    i+=1
    md = get_data(sz,bs)
    learn.set_data(md)
    learn.fit(lr, epochs, cycle_len=cycle_len, use_clr=use_clr,
        best_save_name='ResNext50_s' + str(sz) + "_kaggle-airbus" + str(i))
    print("----------", i, "------------------------------")

## Load the best

In [None]:
log_preds,y = learn.predict_with_targs(is_test=True)
probs = np.exp(log_preds)[:,1]
pred = (probs > 0.5).astype(int)

In [None]:
df = pd.DataFrame({'id':test_names, 'p_ship':probs})
df.to_csv('ship_detectionResNext50.csv', header=True, index=False)

In [None]:
# sz = 384 #image size
# bs = 32  #batch size

# md = get_data(sz,bs)
# learn = ConvLearner.pretrained(arch, md, ps=0.5) #dropout 50%
# learn.opt_fn = optim.Adam
# learn.unfreeze()
# lr=np.array([1e-4,5e-4,2e-3])

In [None]:
print(torch._version_)