In [None]:
#hide
#skip
! [ -e /content ] && pip install -Uqq espiownage fastai wwf # upgrade fastai on colab

# Segmentation Regression - Real data

*Acknowledgement: I took [Zach Mueller's Image Segmentation tutoral notebook](https://walkwithfastai.com/Segmentation) (based on the main FastAI lesson notebook) and modified it to do regression (as per Zach's suggestions) and to work with my own data.* 

In [None]:
#all_slow

In [None]:
!pip install -Uqq fastai espiownage mrspuff typing_extensions -q --upgrade

In [None]:
import espiownage
from espiownage.core import *
sysinfo()
print(f"espiownage version {espiownage.__version__}")

TORCH_VERSION=torch1.9.0; CUDA_VERSION=cu111
CUDA available = True, Device count = 1, Current device = 0
Device name = GeForce RTX 3080
hostname: bengio
espiownage version 0.0.41


In [None]:
from fastai.vision.all import *

from fastcore.xtras import Path

from fastai.callback.hook import summary
from fastai.callback.progress import ProgressCallback
from fastai.callback.schedule import lr_find, fit_flat_cos

from fastai.data.block import DataBlock
from fastai.data.external import untar_data, URLs
from fastai.data.transforms import get_image_files, FuncSplitter, Normalize

from fastai.layers import Mish   # MishJIT gives me trouble :-( 
from fastai.losses import BaseLoss, MSELossFlat, CrossEntropyLossFlat, BCEWithLogitsLossFlat
from fastai.optimizer import ranger

from fastai.torch_core import tensor

from fastai.vision.augment import aug_transforms
from fastai.vision.core import PILImage, PILMask
from fastai.vision.data import ImageBlock, MaskBlock, imagenet_stats
from fastai.vision.learner import unet_learner

from PIL import Image
import numpy as np
import random

from torch import nn
from torchvision.models.resnet import resnet34

import torch
import torch.nn.functional as F

import glob
from pathlib import Path

In [None]:
from mrspuff.utils import on_colab
on_colab = on_colab()
if on_colab:
    path = untar_data('http://hedges.belmont.edu/~shawley/espiownage-cleaner.tgz') 
else:
    path = Path('/home/shawley/datasets/espiownage-cleaner') 
print(path)

/home/shawley/datasets/espiownage-cleaner


In [None]:
# bin_size = 1 worked ok. But 0.2 and 0.5 yielded nothing; the model couldn't learn at all
bin_size = 0.7  
maskdir = path / ('masks_'+str(bin_size))

# We can also generate masks dynamically using `espiownage`'s `gen_masks` script:
#!gen_masks --quiet --step={bin_size} --maskdir={maskdir} --files={str(path/'annotations')+'/*.csv'}

path_im = path/'images'
path_lbl = path/maskdir
 
meta_names = sorted(glob.glob(str(path/'annotations')+'/*.csv'))
fnames = [meta_to_img_path(x, img_bank=path_im) for x in meta_names]
random.shuffle(fnames)
lbl_names = get_image_files(path_lbl)

#sanity check:
print("lengths of input lists (should be the same?):",len(meta_names), len(fnames), len(lbl_names))

get_msk = lambda o: path/maskdir/f'{o.stem}_P{o.suffix}'

colors = list(range(int(11/bin_size) + 1))
print("colors = ",colors)

codes = [str(n) for n in range(len(colors))]; 
print("codes = ",codes)

yrange = len(codes); 
print("yrange = ",yrange)

sz = (384, 512)
half = tuple(int(x/2) for x in sz); 
print("half = ",half)

lengths of input lists (should be the same?): 1955 1955 1955
colors =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
codes =  ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15']
yrange =  16
half =  (192, 256)


In [None]:
# define regression accuracy metrics
def sr_acc_old(inp, targ):          # scores both voids and objects
    targ = targ.squeeze(1)
    return 1 - (inp-targ).abs().round().clamp(max=1).mean() 

def sr_acc(inp, targ, bin_size=1):
    "segmentation regression accuracy: Are we within +/- bin_size?  tries to score only objects, not voids"
    targ = targ.squeeze(1)  
    inp,targ = flatten_check(inp,targ) # https://docs.fast.ai/metrics.html#flatten_check
    mask = targ != void_code  # non_voids
    if len(targ[mask]) == 0:  # Empty image (all void)
        where_correct = (inp-targ).abs() < bin_size              # gonna be ~100%!
    else:
        where_correct = (inp[mask]-targ[mask]).abs() < bin_size  # don't count voids in metric
    return where_correct.float().mean()

# Cell
def sr_acc05(inp, targ): return sr_acc(inp, targ, bin_size=0.5)
def sr_acc07(inp, targ): return sr_acc(inp, targ, bin_size=0.7)
def sr_acc1(inp, targ):  return sr_acc(inp, targ, bin_size=1)
def sr_acc15(inp, targ): return sr_acc(inp, targ, bin_size=1.5)
def sr_acc2(inp, targ):  return sr_acc(inp, targ, bin_size=2)

In [None]:
#wandb setup
!pip install wandb -qqq
import wandb
from fastai.callback.wandb import *
wandb.login()



True

In [None]:
# set up k-fold splitting
kfold = True
k = 0   # set k = 0 to 4  & re-run everything from here down
nk = 5
nv = int(len(fnames)/nk) # size of val set
bgn = k*nv                   # ind to start val set
inds = list(range(bgn, bgn+nv)) # indices for this val set

db = DataBlock(blocks=(ImageBlock, MaskBlock(codes)),
    get_items=get_image_files,
    splitter=IndexSplitter(inds),
    get_y=get_msk,
    batch_tfms=[*aug_transforms(size=half, flip_vert=True), Normalize.from_stats(*imagenet_stats)])
dls = db.dataloaders(path/'images', fnames=fnames, bs=4)
dls.vocab = codes
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['0']

In [None]:
opt = ranger

hrfac = 1.2  # 'headroom factor'
y_range=(0,int(len(codes)*hrfac))  # balance between "clamping" to range of real data vs too much "compression" from sigmoid nonlineari

#learn = unet_learner(dls, resnet34, yrange=len(codes), loss_func=MSELossFlat(), metrics=acc_camvid, self_attention=True, act_cls=Mish, opt_func=opt)
metrics = [mae, sr_acc_old, sr_acc05, sr_acc07, sr_acc1, sr_acc15, sr_acc2]

# run parameters
epochs, lr = 12*4, 1e-3

wandb.init(project='segreg_kfold') # <-- let wandb make up names  #name=f"k={k},e{epochs},lr{lr}")
learn = unet_learner(dls, resnet34, n_out=1, y_range=y_range, loss_func=MSELossFlat(), 
                     metrics=metrics, self_attention=True, act_cls=Mish, opt_func=opt,
                     cbs=WandbCallback())

#lr = learn.lr_find().valley
#print("Suggested Learning Rate =",lr)


print("----- HALF SIZE TRAINING")

print("Training: frozen epochs...")
learn.fit_flat_cos(12, slice(lr))  # these frozen epochs don't yield much improvement btw

print("unfreezing model, lowering lr by 4")
learn.unfreeze()
lrs = slice(lr/400, lr/4)

print("Training: unfrozen epochs...")

learn.fit_flat_cos(12, lrs)

halfweights = 'seg_reg_real_half'
print(f"Saving model: {halfweights}")
learn.save(halfweights)
#  Nope we're not finished! Save wandb.finish() until after Full size training.

print("\n----- FULL SIZE TRAINING -----")

db = DataBlock(blocks=(ImageBlock, MaskBlock(codes)),
    get_items=get_image_files,
    splitter=IndexSplitter(inds),
    get_y=get_msk,
    batch_tfms=[*aug_transforms(size=sz, flip_vert=True), Normalize.from_stats(*imagenet_stats)])
dls = db.dataloaders(path/'images', fnames=fnames, bs=2)  # smaller batch size because we're now full size
dls.vocab = codes

learn = unet_learner(dls, resnet34, n_out=1, y_range=y_range, loss_func=MSELossFlat(), 
                     metrics=metrics, self_attention=True, act_cls=Mish, opt_func=opt,
                     cbs=WandbCallback())
learn.load(halfweights)

#learn.lr_find(end_lr=5e-3)

lr = 3e-4
print("Training: frozen epochs...")
learn.fit_flat_cos(10, slice(lr))

print("unfreezing model, lowering lr by...stuff")
learn.unfreeze()
lrs = slice(1e-6,lr/10); lrs

print("Training: unfrozen epochs...")
learn.fit_flat_cos(10, lrs)

print("Finishing WandB")
wandb.finish()

fullweights = 'seg_reg_real_full'
print(f"Saving model: {fullweights}")
learn.save(fullweights)

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

wandb: ERROR Error while calling W&B API: Error 1062: Duplicate entry '465532-2k65c6y3' for key 'PRIMARY' (<Response [409]>)
wandb: ERROR Error while calling W&B API: Error 1062: Duplicate entry '465532-2k65c6y3' for key 'PRIMARY' (<Response [409]>)
wandb: ERROR Error while calling W&B API: Error 1062: Duplicate entry '465532-2k65c6y3' for key 'PRIMARY' (<Response [409]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


----- HALF SIZE TRAINING
Training: frozen epochs...
WandbCallback requires use of "SaveModelCallback" to log best model


epoch,train_loss,valid_loss,mae,sr_acc_old,sr_acc05,sr_acc07,sr_acc1,sr_acc15,sr_acc2,time


wandb: ERROR Error while calling W&B API: Error 1062: Duplicate entry '465532-2k65c6y3' for key 'PRIMARY' (<Response [409]>)
Process wandb_internal:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/shawley/.local/lib/python3.8/site-packages/wandb/sdk/internal/internal.py", line 152, in wandb_internal
    thread.join()
  File "/usr/lib/python3.8/threading.py", line 1011, in join
    self._wait_for_tstate_lock()
  File "/usr/lib/python3.8/threading.py", line 1027, in _wait_for_tstate_lock
    elif lock.acquire(block, timeout):
KeyboardInterrupt


KeyboardInterrupt: 

## Inference
this will generate a bunch of images of segmentation masks and a list of filenames of top losses

In [None]:
learn.load(fullweights)

preds, targs, losses = learn.get_preds(with_loss=True) # validation set only
print(preds.shape, targs.shape)
len(preds)

def save_tmask(tmask, fname='', norm=False): # save tensor mask
    tmask_new = tmask[0].squeeze().cpu().numpy() 
    use_min, use_max = 0, np.max(np.array(colors))    # use scale of max ring count
    if norm: use_min, use_max = tmask_new.min(), tmask_new.max()   # auto scale for just this image
    rescaled = (255.0 / use_max * (tmask_new - use_min)).astype(np.uint8)
    im = Image.fromarray(rescaled)
    if fname != '': im.save(fname)
    return im

seg_img_dir = 'seg_reg_images'
#!rm -rf {seg_img_dir};  # leave 'em
! mkdir {seg_img_dir}

results = []
for i in range(len(preds)):
    #line_list = [dls.valid.items[i].stem]+[round(targs[i].cpu().numpy().item(),2), round(preds[i][0].cpu().numpy().item(),2), losses[i].cpu().numpy(), i]
    filestem = dls.valid.items[i].stem
    line_list = [filestem]+[losses[i].cpu().numpy(), i]
    save_tmask(preds[i], seg_img_dir+'/'+filestem+'_pred.png')
    results.append(line_list)

# store as pandas dataframe
res_df = pd.DataFrame(results, columns=['filename', 'loss','i'])

res_df = res_df.sort_values('loss', ascending=False) # top loss order
res_df.to_csv(f'segreg_top_losses_real_k{k}.csv', index=False)

<fastai.learner.Learner at 0x7fe9342206d0>