# Air Craft Detection using IceVision

## Installing IceVision and IceData


In [None]:
!wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

In [None]:
# Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation

# Choose your installation target: cuda11 or cuda10 or cpu
!bash icevision_install.sh cuda11 master


In [None]:
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

## Imports

In [None]:
# !pip install --upgrade -q wandb

In [None]:
!pip install sahi -q

In [None]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# wandb_api = user_secrets.get_secret("wandb_api")

In [None]:
# import wandb
# from wandb.keras import WandbCallback
# wandb.login(key=wandb_api)

In [None]:
from icevision.all import *
import os
import ast
import random
import pandas as pd
from fastai.callback.wandb import *
from fastai.callback.tracker import SaveModelCallback
from icevision.imports import *
from icevision.utils import *
from icevision.data import *
from icevision.metrics.metric import *
from icevision.models.inference_sahi import IceSahiModel

In [None]:
!cp -r ../input/airbus-aircrafts-sample-dataset ./

In [None]:
!ls

In [None]:
!pwd

## Visual Inspection

In [None]:
DATA_DIR = Path('./airbus-aircrafts-sample-dataset')
img_list = list(DATA_DIR.glob('images/*.jpg'))
pickone = random.choice(img_list)
img = PIL.Image.open(pickone)
display(img)

In [None]:
only_files = [DATA_DIR / f.name for f in img_list if os.path.isfile(f) and f.name[-4:] == ".jpg"]
print("Found {} images files in {}".format(len(only_files), DATA_DIR))

IMAGE_HEIGHT, IMAGE_WIDTH = img.size
num_channels = len(img.mode)
print("Image size: {}".format((IMAGE_HEIGHT, IMAGE_WIDTH)))
print("Num channels: {}".format(num_channels))

## Read CSV file

In [None]:
df = pd.read_csv(DATA_DIR / 'annotations.csv')

### `geometry` is encoded as string


In [None]:
geometry = df.loc[0]['geometry']
geometry

In [None]:
geometry2 = ast.literal_eval(geometry.rstrip('\r\n'))
geometry2

### Converting `geometry` column to a proper format

In [None]:
# convert a string record into a valid python object
def f(x): 
    return ast.literal_eval(x.rstrip('\r\n'))

df = pd.read_csv(DATA_DIR / "annotations.csv", 
                converters={'geometry': f})
df.head(10)

### Calcutation the Bounding Box `(xmin, ymin, xmax, ymax)`

In [None]:
geometry = df.loc[0]['geometry']
geometry

In [None]:
arr = np.array(geometry).T

In [None]:
xmin = np.min(arr[0])
ymin = np.min(arr[1])
xmax = np.max(arr[0])
ymax = np.max(arr[1])
(xmin, ymin, xmax, ymax)

### Updating the dataframe 

In [None]:
def getBounds(geometry):
    try: 
        arr = np.array(geometry).T
        xmin = np.min(arr[0])
        ymin = np.min(arr[1])
        xmax = np.max(arr[0])
        ymax = np.max(arr[1])
        return (xmin, ymin, xmax, ymax)
    except:
        return np.nan

def getWidth(bounds):
    try: 
        (xmin, ymin, xmax, ymax) = bounds
        return np.abs(xmax - xmin)
    except:
        return np.nan

def getHeight(bounds):
    try: 
        (xmin, ymin, xmax, ymax) = bounds
        return np.abs(ymax - ymin)
    except:
        return np.nan

df.rename(columns={'class':'label'}, inplace=True) # to avoid semantic conflicts

# Create bounds, width and height
df.loc[:,'bounds'] = df.loc[:,'geometry'].apply(getBounds)
df.loc[:,'width'] = df.loc[:,'bounds'].apply(getWidth)
df.loc[:,'height'] = df.loc[:,'bounds'].apply(getHeight)
df.head(10)

## Create the Parser

The first step is to create a template record for our specific type of dataset, in this case we're doing standard object detection:

In [None]:
template_record = ObjectDetectionRecord()

Now use the method `generate_template` that will print out all the necessary steps we have to implement.

In [None]:
Parser.generate_template(template_record)

We can copy the template and use it as our starting point. Let's go over each of the methods we have to define:

- `__init__`: What happens here is completely up to you, normally we have to pass some reference to our data, `data_dir` in our case.

- `__iter__`: This tells our parser how to iterate over our data, each item returned here will be passed to `parse_fields` as `o`. In our case we call `df.itertuples` to iterate over all `df` rows.

- `__len__`: How many items will be iterating over.

- `imageid`: Should return a `Hashable` (`int`, `str`, etc). In our case we want all the dataset items that have the same `filename` to be unified in the same record.

- `parse_fields`: Here is where the attributes of the record are collected, the template will suggest what methods we need to call on the record and what parameters it expects. The parameter `o` it receives is the item returned by `__iter__`.

!!! danger "Important"  
    Be sure to pass the correct type on all record methods!

In [None]:
class AirbusParser(Parser):
    # convert a string record into a valid python object
    def convert_fn(x): 
        return ast.literal_eval(x.rstrip('\r\n'))

    def __init__(self, template_record, data_dir, df):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        # self.df = pd.read_csv(data_dir / "annotations.csv", converters={'geometry': self.convert_fn})
        self.df = df

        self.class_map = ClassMap(list(self.df['label'].unique()))
        

    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
        
    def __len__(self) -> int:
        return len(self.df)
        
    def record_id(self, o) -> Hashable:
        return o.image_id
        
    def parse_fields(self, o, record, is_new):
        if is_new:
            
            filepath = self.data_dir / 'images' / o.image_id
            record.set_filepath(filepath)
            
            if filepath.exists():
              image_size = get_img_size(filepath)
              record.set_img_size(image_size)
            
            record.detection.set_class_map(self.class_map)
        

        (xmin, ymin, xmax, ymax) = o.bounds
        record.detection.add_bboxes([BBox.from_xyxy(xmin, ymin, xmax, ymax)])
        record.detection.add_labels([o.label])

Let's randomly split the data and parser with `Parser.parse`:

In [None]:
data_dir = Path('./airbus-aircrafts-sample-dataset')
data_dir

In [None]:
data_dir.ls()

In [None]:
parser = AirbusParser(template_record, data_dir, df)

In [None]:
train_records, valid_records = parser.parse()

Let's take a look at one record:

In [None]:
show_record(train_records[0], display_label=False, figsize=(14, 10))

In [None]:
train_records[0]

In [None]:
# Transforms
# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
# Try image_size = 512 or image_size = 640 or image_size = 768 (incrementing by 128px)
image_size = 640
presize = image_size + 128
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=presize), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

In [None]:
# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

In [None]:
# Show an element of the train_ds with augmentation transformations applied
samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)

## Models

We've selected a few of the many options below. You can easily pick which libraries, models, and backbones you like to use.

In [None]:
# Just change the value of selection to try another model

selection = 13

extra_args = {}

if selection == 0:
  model_type = models.mmdet.vfnet
  backbone = model_type.backbones.resnet50_fpn_mstrain_2x

if selection == 1:
  model_type = models.mmdet.retinanet
  backbone = model_type.backbones.resnet50_fpn_1x
  # extra_args['cfg_options'] = { 
  #   'model.bbox_head.loss_bbox.loss_weight': 2,
  #   'model.bbox_head.loss_cls.loss_weight': 0.8,
  #    }

if selection == 2:
  model_type = models.mmdet.faster_rcnn
  backbone = model_type.backbones.resnet101_fpn_2x
  # extra_args['cfg_options'] = { 
  #   'model.roi_head.bbox_head.loss_bbox.loss_weight': 2,
  #   'model.roi_head.bbox_head.loss_cls.loss_weight': 0.8,
  #    }

if selection == 3:
  model_type = models.mmdet.ssd
  backbone = model_type.backbones.ssd300

if selection == 4:
  model_type = models.mmdet.yolox
  backbone = model_type.backbones.yolox_s_8x8

if selection == 5:
  model_type = models.mmdet.yolof
  backbone = model_type.backbones.yolof_r50_c5_8x8_1x_coco

if selection == 6:
  model_type = models.mmdet.detr
  backbone = model_type.backbones.r50_8x2_150e_coco

if selection == 7:
  model_type = models.mmdet.deformable_detr
  backbone = model_type.backbones.twostage_refine_r50_16x2_50e_coco

if selection == 8:
  model_type = models.mmdet.fsaf
  backbone = model_type.backbones.x101_64x4d_fpn_1x_coco

if selection == 9:
  model_type = models.mmdet.sabl
  backbone = model_type.backbones.r101_fpn_gn_2x_ms_640_800_coco

if selection == 10:
  model_type = models.mmdet.centripetalnet
  backbone = model_type.backbones.hourglass104_mstest_16x6_210e_coco

elif selection == 11:
  # The Retinanet model is also implemented in the torchvision library
  model_type = models.torchvision.retinanet
  backbone = model_type.backbones.resnet50_fpn

elif selection == 12:
  model_type = models.ross.efficientdet
  backbone = model_type.backbones.tf_lite0
  # The efficientdet model requires an img_size parameter
  extra_args['img_size'] = image_size

elif selection == 13:
  model_type = models.ultralytics.yolov5
  backbone = model_type.backbones.large
  # The yolov5 model requires an img_size parameter
  extra_args['img_size'] = image_size

model_type, backbone, extra_args

In [None]:
# Instantiate the model
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 

In [None]:
# Data Loaders
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

## Training

In [None]:
class COCOMetricType(Enum):
    """Available options for `COCOMetric`."""

    bbox = "bbox"
    mask = "segm"
    keypoint = "keypoints"


class COCOMetric(Metric):
    """Wrapper around [cocoapi evaluator](https://github.com/cocodataset/cocoapi)
    Calculates average precision.
    # Arguments
        metric_type: Dependent on the task you're solving.
        print_summary: If `True`, prints a table with statistics.
        show_pbar: If `True` shows pbar when preparing the data for evaluation.
    """

    def __init__(
        self,
        metric_type: COCOMetricType = COCOMetricType.bbox,
        iou_thresholds: Optional[Sequence[float]] = None,
        print_summary: bool = False,
        show_pbar: bool = False,
    ):
        self.metric_type = metric_type
        self.iou_thresholds = iou_thresholds
        self.print_summary = print_summary
        self.show_pbar = show_pbar
        self._records, self._preds = [], []

    def _reset(self):
        self._records.clear()
        self._preds.clear()

    def accumulate(self, preds):
        for pred in preds:
            self._records.append(pred.ground_truth)
            self._preds.append(pred.pred)

    def finalize(self) -> Dict[str, float]:
        with CaptureStdout():
            coco_eval = create_coco_eval(
                records=self._records,
                preds=self._preds,
                metric_type=self.metric_type.value,
                iou_thresholds=self.iou_thresholds,
                show_pbar=self.show_pbar,
            )
            coco_eval.evaluate()
            coco_eval.accumulate()

        with CaptureStdout(propagate_stdout=self.print_summary):
            coco_eval.summarize()

        stats = coco_eval.stats
        logs = {
            "AP (IoU=0.50:0.95) area=all": stats[0],
            "AP (IoU=0.50) area=all": stats[1],
            "AP (IoU=0.75) area=all": stats[2],
            "AP (IoU=0.50:0.95) area=small": stats[3],
            "AP (IoU=0.50:0.95) area=medium": stats[4],
            "AP (IoU=0.50:0.95) area=large": stats[5],
            "AR (IoU=0.50:0.95) area=all maxDets=1": stats[6],
            "AR (IoU=0.50:0.95) area=all maxDets=10": stats[7],
            "AR (IoU=0.50:0.95) area=all maxDets=100": stats[8],
            "AR (IoU=0.50:0.95) area=small maxDets=100": stats[9],
            "AR (IoU=0.50:0.95) area=medium maxDets=100": stats[10],
            "AR (IoU=0.50:0.95) area=large maxDets=100": stats[11],
        }

        self._reset()
        return logs

In [None]:
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]

## Training using fastai

In [None]:
# wandb.init(project="aircraft-detection", name="yolov5-large-384", reinit=True)

In [None]:
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
#cbs=[WandbCallback(), SaveModelCallback()]

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(40, 0.009120108559727669, freeze_epochs=1)

## Training using Pytorch Lightning

In [None]:
# class LightModel(model_type.lightning.ModelAdapter):
#     def configure_optimizers(self):
#         return Adam(self.parameters(), lr=1e-4)
    
# light_model = LightModel(model, metrics=metrics)

In [None]:
# trainer = pl.Trainer(max_epochs=20, gpus=1)
# trainer.fit(light_model, train_dl, valid_dl)

## Showing the results

In [None]:
model_type.show_results(model, valid_ds, detection_threshold=.5)

## Batch Inference (Prediction)

In [None]:
infer_dl = model_type.infer_dl(valid_ds, batch_size=4, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)

In [None]:
show_preds(preds=preds[:4])

In [None]:
# wandb.finish()

## SAHI

In [None]:
sahimodel = IceSahiModel(model_type=model_type, model=model, class_map=parser.class_map, tfms=valid_tfms, confidence_threshold=0.4)

In [1]:
img_list = list(DATA_DIR.glob('images/*.jpg'))
pickone = random.choice(img_list)
img = PIL.Image.open(pickone)
display(img)

NameError: name 'DATA_DIR' is not defined

In [None]:
pred = sahimodel.get_sliced_prediction(
                img,
                keep_sahi_format=False,
                return_img=True,
                slice_height = 128,
                slice_width = 128,
                overlap_height_ratio = 0.2,
                overlap_width_ratio = 0.2,
            )

In [None]:
pred['img']

In [None]:
# from icevision.models.checkpoint import *
# save_icevision_checkpoint(model,
#                         model_name='ultralytics.yolov5', 
#                         backbone_name='medium_p6',
#                         img_size=image_size,
#                         classes=parser.class_map.get_classes(),
#                         filename='./models/model_checkpoint.pth',
#                         meta={'icevision_version': '0.12.0'})
