## Camvid segmentation

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai import *
from fastai.vision import *

In [None]:
path = Path('data/camvid')

In [None]:
path.ls()

In [None]:
path_lbl = path/'labels'
path_img = path/'images'

## Data

In [None]:
fnames = get_image_files(path_img)
fnames[:5]

In [None]:
path_lbl.ls()[:5]

In [None]:
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))

In [None]:
codes = np.loadtxt(path/'codes.txt', dtype=str)
codes

In [None]:
def get_y_fn(fn): return path_lbl/f'{fn.name[:-4]}_P.png'

In [None]:
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)

In [None]:
mask.data

## Datasets

In [None]:
valid_fns = np.loadtxt(path/'valid.txt', dtype=str)
valid_fns[:5]

In [None]:
valid_fns = [path_img/o for o in valid_fns]
train_fns = list(set(fnames)-set(valid_fns))

In [None]:
y_train_fns = [get_y_fn(o) for o in train_fns]
y_valid_fns = [get_y_fn(o) for o in valid_fns]
len(train_fns),len(valid_fns),len(y_train_fns),len(y_valid_fns)

In [None]:
size=128
bs=32

In [None]:
train_ds = SegmentationDataset(train_fns, y_train_fns)
valid_ds = SegmentationDataset(valid_fns, y_valid_fns)

In [None]:
train_tfms,valid_tfms = get_transforms()

In [None]:
train_tds = DatasetTfm(train_ds, train_tfms, size=size, tfm_y=True)
valid_tds = DatasetTfm(valid_ds, valid_tfms, size=size, tfm_y=True)

In [None]:
data = DataBunch.create(train_tds, valid_tds, bs=bs)

In [None]:
x,y = zip(*[train_tds[i] for i in range(9)])

In [None]:
show_xy_images(x, y, rows=3)

## Refactor

- valid set
  - path
  - fnames
  - idxs
  - min idx
  - rand pct
- type of data
- source of labels

In [None]:
class ItemList():
    "A collection of items with `__len__` and `__getitem__` with `ndarray` indexing semantics"
    def __init__(self, items:Iterator): self.items = np.array(list(items))
    def __len__(self)->int: return len(self.items)
    def __getitem__(self,i:int)->Any: return self.items[i]
    def __repr__(self)->str: return f'{self.__class__.__name__} ({len(self)} items)\n{self.items}'

In [None]:
def join_path(fname:PathOrStr, path:PathOrStr='.')->Path:
    "`Path(path)/Path(fname)`, `path` defaults to current dir"
    return Path(path)/Path(fname)

In [None]:
def join_paths(fnames:FilePathList, path:PathOrStr='.')->Collection[Path]:
    path = Path(path)
    return [join_path(o,path) for o in fnames]

In [None]:
def loadtxt_str(path:PathOrStr)->np.ndarray:
    "Return `ndarray` of `str` of lines of text from `path`"
    return np.loadtxt(str(path), str)

In [None]:
class ImageFileList(ItemList):
    @classmethod
    def from_folder(cls, path:PathOrStr, check_ext:bool=True, recurse=False)->'ImageFileList':
        return cls(get_image_files(path, check_ext=check_ext, recurse=recurse))
    
    def label_from_func(self, func:Callable)->Collection:
        return LabelList((o,func(o)) for o in self.items)

In [None]:
class LabelList(ItemList):
    @property
    def files(self): return self.items[:,0]
    
    def split_by_files(self, valid_fnames:FilePathList)->'SplitData':
        valid = [o for o in self.items if o[0] in valid_fnames]
        train = [o for o in self.items if o[0] not in valid_fnames]
        return SplitData(LabelList(train), LabelList(valid))
    
    def split_by_fname_file(self, fname:PathOrStr, path:PathOrStr='.')->'SplitData':
        fnames = join_paths(loadtxt_str(fname), path)
        return self.split_by_files(fnames)

In [None]:
@dataclass
class SplitData():
    train:LabelList
    valid:LabelList
    
    @property
    def lists(self): return [self.train,self.valid]
    
    def datasets(self, dataset_cls:type, tfms:TfmList, **kwargs):
        dss = [dataset_cls(*o.items.T) for o in self.lists]
        return SplitDatasets(*transform_datasets(*dss, tfms=tfms, **kwargs))

In [None]:
@dataclass
class SplitDatasets():
    train_ds:Dataset
    valid_ds:Dataset
    
    @property
    def datasets(self): return [self.train_ds,self.valid_ds]

    def dataloaders(self, **kwargs):
        return [DataLoader(o, **kwargs) for o in self.datasets]
    
    def databunch(self, **kwargs): return ImageDataBunch.create(*self.datasets, **kwargs)

- get filenames
- get labels
- split data
- make datasets
- get tfms
- (make dls)
- (use device)
- databunch

In [None]:
tfms = get_transforms()

In [None]:
ifl = ImageFileList.from_folder(path_img); ifl[0]

In [None]:
ll = ifl.label_from_func(get_y_fn); ll[0]

In [None]:
sd = ll.split_by_fname_file(path/'valid.txt', path_img)

In [None]:
tfms = get_transforms()

In [None]:
dss = sd.datasets(SegmentationDataset, tfms, size=128, tfm_y=True)

In [None]:
data = dss.databunch()

In [None]:
data = (ImageFileList.from_folder(path_img)
        .label_from_func(get_y_fn)
        .split_by_fname_file(path/'valid.txt', path_img)
        .datasets(SegmentationDataset, tfms, size=128, tfm_y=True)
        .databunch())

In [None]:
x,y = data.train_dl.one_batch()
show_xy_images(x,y,rows=3)

In [None]:
x.shape,y.shape

In [None]:
show_xy_images(x,y,rows=3)

In [None]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

In [None]:
def accuracy_no_void(input, target, void_code):
    target = target.squeeze()
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [None]:
metrics=partial(accuracy_no_void, void_code=void_code)
lr = 1e-3

In [None]:
body = create_body(models.resnet34(True), -2)
model = models.unet.DynamicUnet(body, n_classes=len(codes)).cuda()
learn = Learner(data, model, metrics=metrics, loss_func=CrossEntropyFlat())
learn.split([model[0][6], model[1]])
learn.freeze()

In [None]:
lr_find(learn)
learn.recorder.plot()

In [None]:
lr = 1e-3

In [None]:
learn.fit_one_cycle(6, slice(lr))

In [None]:
learn.unfreezefreeze()

In [None]:
learn.fit_one_cycle(6, slice(lr/100,lr))