
# IceVision Bboxes - Real Data

*This is a mashup of IceVision's "[Custom Parser](https://airctic.com/0.8.0/custom_parser/)" example and their ["Getting Started (Object Detection)"](https://airctic.com/0.8.0/getting_started_object_detection/) notebooks, to analyze SPNet Real dataset, for which I [generated bounding boxes](https://github.com/drscotthawley/SPNet/blob/master/gen_bboxes_csv.py). -- S.H. Hawley, July 1, 2021*

In [None]:
# IceVision - IceData - MMDetection - YOLO v5 Installation on Colab
#try:
#    !wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
#    !chmod +x install_colab.sh && ./install_colab.sh
#except:
#    print("Ignore the error messages and just keep going")

# for local install see other notebooks

In [None]:
#all_slow

In [None]:
# system info
import torch, re, os
tv, cv = torch.__version__, torch.version.cuda
tv = re.sub('\+cu.*','',tv)
TORCH_VERSION = 'torch'+tv[0:-1]+'0'
CUDA_VERSION = 'cu'+cv.replace('.','')

print(f"TORCH_VERSION={TORCH_VERSION}; CUDA_VERSION={CUDA_VERSION}")
print(f"CUDA available = {torch.cuda.is_available()}, Device count = {torch.cuda.device_count()}, Current device = {torch.cuda.current_device()}")
print(f"Device name = {torch.cuda.get_device_name()}")
print("hostname:",os.uname()[1])

TORCH_VERSION=torch1.8.0; CUDA_VERSION=cu102
CUDA available = True, Device count = 1, Current device = 0
Device name = TITAN X (Pascal)
hostname: lecun


In [None]:
# For local Icevision Install of MMD.  cf. https://airctic.com/0.8.1/install/
#!pip install -qq mmcv-full=="1.3.8" -f https://download.openmmlab.com/mmcv/dist/{CUDA_VERSION}/{TORCH_VERSION}/index.html --upgrade
#!pip install mmdet -qq

In [None]:
from icevision.all import *
import pandas as pd
from pathlib import Path

[1m[1mINFO    [0m[1m[0m - [1mThe mmdet config folder already exists. No need to downloaded it. Path : /home/shawley/.icevision/mmdetection_configs/mmdetection_configs-2.10.0/configs[0m | [36micevision.models.mmdet.download_configs[0m:[36mdownload_mmdet_configs[0m:[36m17[0m


In [None]:
data_dir = Path('/home/shawley/datasets/espiownage-cleaner')  # real data is local and private
df = pd.read_csv(data_dir / "bboxes/annotations.csv")
df['label'] = 'AN'  # antinode
df.head()
print(f"{len(df)} total bboxes of target data.")

6309 total bboxes of target data.


## Custom Data Parser

In [None]:
template_record = ObjectDetectionRecord()
#Parser.generate_template(template_record)  # this just prints stuff to the screen.

class BBoxParser(Parser):
    def __init__(self, template_record, data_dir):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        self.df = pd.read_csv(data_dir / "bboxes/annotations.csv")
        self.df['label'] = 'AN'  # make them all the same object
        self.class_map = ClassMap(list(self.df['label'].unique()))
        
    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
        
    def __len__(self) -> int:
        return len(self.df)
        
    def record_id(self, o) -> Hashable:
        return o.filename
        
    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(self.data_dir / 'images' / o.filename)
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)
        
        record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
        record.detection.add_labels([o.label])
        
parser = BBoxParser(template_record, data_dir)

In [None]:
def get_bblist(pred):  # this is just for parsing predictions, below
    my_bblist = []
    bblist = pred.pred.detection.bboxes
    for i in range(len(bblist)):
        my_bblist.append([bblist[i].xmin, bblist[i].ymin, bblist[i].xmax, bblist[i].ymax])
    return my_bblist

In [None]:
# sanity testing my k-fold magic
"""commenting out now
n = 19  # len(df)
idmap = IDMap(list(df['filename'][0:n]))
nk = 5 # k-fold training
val_size = int(round(n/nk))
indlist =  list(range(n))
for k in range(nk):
    if k < nk-1: 
        val_list = indlist[k*val_size:(k+1)*val_size]
        train_list = indlist[0:k*val_size] + indlist[(k+1)*val_size:n]
    else:   # last one might be a bit different
        val_list = indlist[k*val_size:]
        train_list = indlist[0:-len(val_list)]
    val_id_list = [df['filename'][i] for i in val_list]
    train_id_list = [df['filename'][i] for i in train_list]


    presplits = [train_id_list,val_id_list]
    data_splitter = FixedSplitter(presplits)
    splits = data_splitter(idmap=idmap)
"""

"commenting out now\nn = 19  # len(df)\nidmap = IDMap(list(df['filename'][0:n]))\nnk = 5 # k-fold training\nval_size = int(round(n/nk))\nindlist =  list(range(n))\nfor k in range(nk):\n    if k < nk-1: \n        val_list = indlist[k*val_size:(k+1)*val_size]\n        train_list = indlist[0:k*val_size] + indlist[(k+1)*val_size:n]\n    else:   # last one might be a bit different\n        val_list = indlist[k*val_size:]\n        train_list = indlist[0:-len(val_list)]\n    val_id_list = [df['filename'][i] for i in val_list]\n    train_id_list = [df['filename'][i] for i in train_list]\n\n\n    presplits = [train_id_list,val_id_list]\n    data_splitter = FixedSplitter(presplits)\n    splits = data_splitter(idmap=idmap)\n"

In [None]:
n = len(df)
idmap, indlist = IDMap(list(df['filename'][0:n])), list(range(n))
nk = 5 # k-fold training
val_size = int(round(n/nk))

#k = 0
#if k < nk:
for k in range(nk):
    
    print(f"\n--------------------------------------")
    print(f"k = {k+1}/{nk}:")
    
    if False: 
        # Split the data in to train and val
        if k < nk-1: 
            val_list = indlist[k*val_size:(k+1)*val_size]
            train_list = indlist[0:k*val_size] + indlist[(k+1)*val_size:n]
        else:   # last one might be a bit different
            val_list = indlist[k*val_size:]
            train_list = indlist[0:-len(val_list)]
        val_id_list = [df['filename'][i] for i in val_list]
        train_id_list = [df['filename'][i] for i in train_list]
        presplits = [train_id_list,val_id_list]
    train_records, valid_records = parser.parse() #data_splitter=FixedSplitter(presplits))  

    # Transforms
    # size is set to 384 because EfficientDet requires its inputs to be divisible by 128
    image_size = 384 
    train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
    valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

    # Datasets
    train_ds = Dataset(train_records, train_tfms)
    valid_ds = Dataset(valid_records, valid_tfms)

    model_type = models.mmdet.retinanet
    backbone = model_type.backbones.resnet50_fpn_1x(pretrained=True)

    # Just change the value of selection to try another model

    selection, extra_args = 0, {}

    if selection == 0:
      model_type = models.mmdet.retinanet
      backbone = model_type.backbones.resnet50_fpn_1x
    elif selection == 1:
      # The Retinanet model is also implemented in the torchvision library
      model_type = models.torchvision.retinanet
      backbone = model_type.backbones.resnet50_fpn
    elif selection == 2:
      model_type = models.ross.efficientdet
      backbone = model_type.backbones.tf_lite0
      # The efficientdet model requires an img_size parameter
      extra_args['img_size'] = image_size
    elif selection == 3:
      model_type = models.ultralytics.yolov5
      backbone = model_type.backbones.small
      # The yolov5 model requires an img_size parameter
      extra_args['img_size'] = image_size

    print("model_type, backbone, extra_args =",model_type, backbone, extra_args)

    # Instantiate the mdoel
    model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 
    metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]

    # Data Loaders
    train_dl = model_type.train_dl(train_ds, batch_size=4, num_workers=4, shuffle=True)
    valid_dl = model_type.valid_dl(valid_ds, batch_size=4, num_workers=4, shuffle=False)

    # fastai learner
    learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
    
    # training loop
    epochs, freeze_epochs, lr = 1, 1, 7e-5  # keep it short for testing
    #epochs, freeze_epochs, lr = 60, 2, 7e-5  # keep it short for testing
    print("  Training Loop: epochs, freeze_epochs, lr =",epochs, freeze_epochs, lr)
    learn.fine_tune(epochs, lr, freeze_epochs=freeze_epochs)  
    
    learn.save('iv_bbox_real_kfolding')
   

    # Inference
    print("  Inference")
    learn.load('iv_bbox_real_kfolding')
    preds = model_type.predict(model, valid_ds)
    results = []
    for i in range(len(preds)):
        if (len(preds[i].pred.detection.scores) == 0): continue   # sometimes you get a zero box/prediction. ??
        worst_score = np.min(np.array(preds[i].pred.detection.scores))
        line_list = [str(Path(valid_ds[i].common.filepath).stem)+'.csv', get_bblist(preds[i]), preds[i].pred.detection.scores, worst_score, i]
        results.append(line_list)

    # store as pandas dataframe
    res_df = pd.DataFrame(results, columns=['filename', 'bblist','scores','worst_score','i'])
    res_df = res_df.sort_values('worst_score')  # order by worst score as a "top losses" kind of thing
    res_df.head() # take a look
    res_df.to_csv(f'boxes_tl_k{k}.csv', index=False)


--------------------------------------
k = 1/5:


  0%|          | 0/6309 [00:00<?, ?it/s]

[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m136[0m


  0%|          | 0/1564 [00:00<?, ?it/s]

  0%|          | 0/391 [00:00<?, ?it/s]

model_type, backbone, extra_args = <module 'icevision.models.mmdet.models.retinanet' from '/home/shawley/envs/icevision/lib/python3.8/site-packages/icevision/models/mmdet/models/retinanet/__init__.py'> <icevision.models.mmdet.models.retinanet.backbones.resnet_fpn.MMDetRetinanetBackboneConfig object at 0x7ffa106d9790> {}




Use load_from_local loader
The model and loaded state dict do not match exactly

size mismatch for bbox_head.retina_cls.weight: copying a param with shape torch.Size([720, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([9, 256, 3, 3]).
size mismatch for bbox_head.retina_cls.bias: copying a param with shape torch.Size([720]) from checkpoint, the shape in current model is torch.Size([9]).
  Training Loop: epochs, freeze_epochs, lr = 1 1 7e-05


epoch,train_loss,valid_loss,COCOMetric,time
0,0.492385,0.448823,0.507725,00:58




epoch,train_loss,valid_loss,COCOMetric,time
0,0.377579,0.352681,0.595104,01:05


  Inference


RuntimeError: CUDA out of memory. Tried to allocate 3.44 GiB (GPU 0; 11.91 GiB total capacity; 8.92 GiB already allocated; 620.88 MiB free; 10.55 GiB reserved in total by PyTorch)