# **Detectron Half For Great Barrier Reef**

#### One of the most challenges now a days in Kaggle Competetion is that no Internet is provided while submission of the Notebook. To handle complex models such as Detectron2 it might be tricky without internet access as it requires lots of dependencies and we also have to register out dataset. In this Notebook I utilized Detectron2 version 0.5. Hope you like this Notebook.

 ## A little about Detectron2
 Detectron2 is Facebook AI Research's next generation software system that implements state-of-the-art object detection algorithms. It is a ground-up rewrite of the previous version, Detectron, and it originates from maskrcnn-benchmark

### Starting this Notebook with some Basic Imports. As we move on I will be importing other Modules.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json
import ast
from tqdm.notebook import tqdm
tqdm.pandas()
from sklearn.model_selection import GroupKFold
from PIL import Image
from string import Template
from IPython.display import display
from shutil import *
# common libraries
import os, json, cv2, random
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
def get_path(row):
    
    row['image_path'] = f'../input/tensorflow-great-barrier-reef/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

In [None]:
df = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
df.head(5)

In [None]:
df["num_bbox"] = df['annotations'].progress_apply(lambda x: str.count(x, 'x'))
df_train = df[df["num_bbox"]>0]


df_train['annotations'] = df_train['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df_train['bboxes'] = df_train.annotations.progress_apply(get_bbox)


df_train["width"] = 1280
df_train["height"] = 720


df_train = df_train.progress_apply(get_path, axis=1)

In [None]:
df_train.reset_index(drop=True,inplace=True)
df_train

In [None]:
kf = GroupKFold(n_splits = 5) 
df_train['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(kf.split(df_train, y = df_train.video_id.tolist(), groups=df_train.sequence)):
    df_train.loc[val_idx, 'fold'] = fold

df_train.head(5)

In [None]:
!mkdir dataset
!mkdir dataset/images
!mkdir dataset/images/train
!mkdir dataset/images/val
!mkdir dataset/annotations

In [None]:
SELECTED_FOLD = 4

annotion_id=0
for i in tqdm(range(len(df_train))):
    row = df_train.loc[i]
    if row.fold != SELECTED_FOLD:
        copyfile(f'{row.image_path}', f'dataset/images/train/{row.image_id}.jpg')
    else:
        copyfile(f'{row.image_path}', f'dataset/images/val/{row.image_id}.jpg') 

In [None]:
print(f'Number of training files:', len(os.listdir(f"dataset/images/train/")))
print(f'Number of validation files:', len(os.listdir(f"dataset/images/val/")))

In [None]:
def save_annot_json(json_annotation, filename):
    with open(filename, 'w') as f:
        output_json = json.dumps(json_annotation)
        f.write(output_json)

In [None]:
def dataset2coco(df, dest_path):
    
    global annotion_id
    
    annotations_json = {
        "info": [],
        "licenses": [],
        "categories": [],
        "images": [],
        "annotations": []
    }
    
    info = {
        "year": "2022",
        "version": "1",
        "description": "COTS dataset - COCO format",
        "contributor": "Owais Ahmad",
        "url": "https://kaggle.com",
        "date_created": "2022-01-18T16:40:06+00:00"
    }
    annotations_json["info"].append(info)
    
    lic = {
            "id": 1,
            "url": "",
            "name": "Unknown"
        }
    annotations_json["licenses"].append(lic)

    classes = {"id": 0, "name": "starfish", "supercategory": "none"}

    annotations_json["categories"].append(classes)

    
    for ann_row in df.itertuples():
            
        images = {
            "id": ann_row[0],
            "license": 1,
            "file_name": ann_row.image_id + '.jpg',
            "height": ann_row.height,
            "width": ann_row.width,
            "date_captured": "2022-01-18T16:43:26+00:00"
        }
        
        annotations_json["images"].append(images)
        
        bbox_list = ann_row.bboxes
        
        for bbox in bbox_list:
            b_width = bbox[2]
            b_height = bbox[3]
            
            # some boxes in COTS are outside the image height and width
            if (bbox[0] + bbox[2] > 1280):
                b_width = bbox[0] - 1280 
            if (bbox[1] + bbox[3] > 720):
                b_height = bbox[1] - 720 
                
            image_annotations = {
                "id": annotion_id,
                "image_id": ann_row[0],
                "category_id": 0,
                "bbox": [bbox[0], bbox[1], b_width, b_height],
                "area": bbox[2] * bbox[3],
                "segmentation": [],
                "iscrowd": 0
            }
            
            annotion_id += 1
            annotations_json["annotations"].append(image_annotations)
        
        
    print(f"Dataset COTS annotation to COCO json format completed! Files: {len(df)}")
    return annotations_json

In [None]:
train_annot_json = dataset2coco(df_train[df_train.fold != SELECTED_FOLD], f"dataset/images/train/")

### Json files for annotation is saved in seprate annotations folder which I will use further while register our dataset into Detectron2

In [None]:
train_annot_json = dataset2coco(df_train[df_train.fold != SELECTED_FOLD], f"dataset/images/train/")
val_annot_json = dataset2coco(df_train[df_train.fold == SELECTED_FOLD], f"dataset/images/valid")


save_annot_json(train_annot_json, f"dataset/annotations/train.json")
save_annot_json(val_annot_json, f"dataset/annotations/valid.json")

### Helper functions, used these for debugging purposes
### Detector2 build only succeeds if CUDA version is correct

In [None]:
!nvidia-smi
!nvcc --version

In [None]:
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

* ### The submission notebooks don't have access to the internet, in order to install detectron2 we need to download dependecies with pip download, copy them into Output Directory and Install them as followed it in this notebook

In [None]:
!cp  -r ../input/detectron-05/ ./detectron-05/

#### Standard procedure to install Detectron2. Install with this if you are facing Issue with the Offline Version

In [None]:
#!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' #
#!git clone https://github.com/facebookresearch/detectron2.git #

In [None]:
!pip install /kaggle/working/detectron-05/whls/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar --no-index --find-links ../input/detectron-05/whls 
!pip install /kaggle/working/detectron-05/whls/fvcore-0.1.5.post20211019/fvcore-0.1.5.post20211019 --no-index --find-links ../input/detectron-05/whls 
!pip install /kaggle/working/detectron-05/whls/antlr4-python3-runtime-4.8/antlr4-python3-runtime-4.8 --no-index --find-links ../input/detectron-05/whls 
!pip install /kaggle/working/detectron-05/whls/detectron2-0.5/detectron2 --no-index --find-links ../input/detectron-05/whls 

### Base setup For Detectron2 Training

In [None]:
# detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
from detectron2.data.datasets import register_coco_instances
#import shutil
#shutil.rmtree('detectron_clone')

In [None]:
!python -m detectron2.utils.collect_env

### In order to Use Detectron2 We need to Register Out Dataset to Detectron2. While Processing the Dataset I generated corresponding Train.json and Valid.json which I will utilize Now.

In [None]:
register_coco_instances( 'Train_Great_Barrier',{},'/kaggle/working/dataset/annotations/train.json','/kaggle/working/dataset/images/train/')# os.path.join(dataset_dir,train_dir))
register_coco_instances( 'Valid_Great_Barrier',{},'/kaggle/working/dataset/annotations/valid.json','/kaggle/working/dataset/images/val/')# os.path.join(dataset_dir,train_dir))


In [None]:
dataset_dicts = DatasetCatalog.get("Train_Great_Barrier")
metadata_dicts = MetadataCatalog.get("Train_Great_Barrier")

In [None]:
from detectron2.utils.visualizer import ColorMode
from detectron2.engine import DefaultTrainer

In [None]:
fig, ax = plt.subplots(2, 2, figsize =(30,20))
indices=[ax[0][0],ax[1][0],ax[0][1],ax[1][1] ]
i=-1
for d in random.sample(dataset_dicts, 4):
    i=i+1    
    img = cv2.imread(d["file_name"])
    v = Visualizer(img[:, :, :],
                   metadata=metadata_dicts, 
                   scale=0.8, instance_mode=ColorMode.IMAGE_BW 
    )
    out = v.draw_dataset_dict(d)
    indices[i].grid(False)
    indices[i].axis('off')
    indices[i].imshow(out.get_image()[:, :, ::-1])

# Data Augumentation

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader
from detectron2.data import detection_utils as utils
import detectron2.data.transforms as T

def custom_mapper(dataset_dict):
    
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    transform_list = [
                      T.RandomBrightness(0.5, 2.1),
                      T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
                      T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
                      #T.RandomCrop("absolute", (640, 640))
                      ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
class AugTrainer(DefaultTrainer):
    
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

In [None]:
cfg = get_cfg()
config_name = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml" 
#config_name = "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
cfg.merge_from_file(model_zoo.get_config_file(config_name))

cfg.DATASETS.TRAIN = ("Train_Great_Barrier",)
cfg.DATASETS.TEST = ("Valid_Great_Barrier",)

cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_name)

cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.0008

cfg.SOLVER.WARMUP_ITERS = 200
cfg.SOLVER.MAX_ITER = 2500 #adjust up if val mAP is still rising, adjust down if overfit
cfg.SOLVER.STEPS = (11, 50) # must be less than  MAX_ITER 
cfg.SOLVER.GAMMA = 0.5


cfg.SOLVER.CHECKPOINT_PERIOD = 700  # Small value=Frequent save need a lot of storage.
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 16
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1


os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


#Training using custom trainer defined above
trainer = AugTrainer(cfg) 
trainer = DefaultTrainer(cfg) 
#trainer.resume_or_load(resume=False)
trainer.train()


### Dumping config files variables as we may need them while Inferencing 

In [None]:
metrics_df = pd.read_json("./output/metrics.json", orient="records", lines=True)
mdf = metrics_df.sort_values("iteration")
mdf.T

In [None]:
# 1. Loss curve
fig, ax = plt.subplots()

mdf1 = mdf[~mdf["total_loss"].isna()]
ax.plot(mdf1["iteration"], mdf1["total_loss"], c="C0", label="train")
if "validation_loss" in mdf.columns:
    mdf2 = mdf[~mdf["validation_loss"].isna()]
    ax.plot(mdf2["iteration"], mdf2["validation_loss"], c="C1", label="validation")

#ax.set_ylim([0, 0.5])
ax.legend()
ax.set_title("Loss curve")
plt.show()

In [None]:
# 1. Loss curve
fig, ax = plt.subplots()

mdf1 = mdf[~mdf["fast_rcnn/cls_accuracy"].isna()]
ax.plot(mdf1["iteration"], mdf1["fast_rcnn/cls_accuracy"], c="C0", label="train")
# ax.set_ylim([0, 0.5])
ax.legend()
ax.set_title("cls_accuracy")
plt.show()


##  If this Notebook helps you a bit,as I have worked very Hard for this so Please up-vote to keep me motivated and Enthusiastic 😁 Thanks!
- Follow me on Linkedin [Link](https://www.linkedin.com/in/owaiskhan9654/)
- Also see my Portfolio [Link](https://owaiskhan9654.github.io/)

## I might be overfitting Detectron2 in this Notebook but their is a hidden reason for that which I will explain. Also See you in Part2 with more explanations

# Predictor
A predictor is defined with 0.5 threshold score which gives bounding box and label for the test images

In [None]:
!zip -r  output.zip output  

In [None]:
import pickle
with open("cfg.pkl", "wb") as f:
    pickle.dump(cfg, f)

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
#cfg.DATASETS.TEST = ("Test_Great_Barrier", )
predictor = DefaultPredictor(cfg)


In [None]:
from detectron2.evaluation.evaluator import DatasetEvaluator
import pycocotools.mask as mask_util
from detectron2.engine import BestCheckpointer
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

In [None]:
class F2ScoreEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}

In [None]:
print('################################################################')
print('################### test the best model: F2 Score ##################')
print('################################################################')
#cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_best.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
#predictor = DefaultPredictor(cfg)
evaluator = F2ScoreEvaluator("Valid_Great_Barrier")
val_loader = build_detection_test_loader(cfg, "Valid_Great_Barrier")
FS_bm=inference_on_dataset(predictor.model, val_loader, evaluator)['F2 Score']
print("F2 Score for best model=",FS_bm)

print('################################################################')
print('################### test the best model : AP@50-95 ##################')
print('################################################################')
evaluator = COCOEvaluator(Data_Resister_valid, output_dir="./output")
val_loader = build_detection_test_loader(cfg, "Valid_Great_Barrier")
AP_bm=inference_on_dataset(predictor.model, val_loader, evaluator)['bbox']['AP']
print("AP for best model=",AP_bm)

print('################################################################')
print('################## test the final model: F2 Score ##################')
print('################################################################')
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)
evaluator = F2ScoreEvaluator("Valid_Great_Barrier")
val_loader = build_detection_test_loader(cfg, "Valid_Great_Barrier")
FS_fm=inference_on_dataset(predictor.model, val_loader, evaluator)['F2 Score']
print("F2 Score for the final model=",FS_fm)
print('################################################################')
print('################## test final model: AP@50-95 ##################')
print('################################################################')
evaluator = COCOEvaluator("Valid_Great_Barrier", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "Valid_Great_Barrier")
AP_fm=inference_on_dataset(predictor.model, val_loader, evaluator)['bbox']['AP']
print("AP for the final model=",AP_fm)