# Imports and Dependencies

In [None]:
import torch
print(torch.__version__)
# print(torch.version.cuda)
!nvcc --version

In [None]:
!python -m pip install pyyaml==5.3.1
import sys, os, distutils.core
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

import torch, detectron2
!nvcc --version
# TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
# CUDA_VERSION = torch.__version__.split("+")[-1]
# print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
# print("detectron2:", detectron2.__version__)

# Some basic setup:
# Setup detectron2 logger
# basic libraries ...

In [None]:
import os
import random
import shutil
import numpy as np
import pandas as pd
from tqdm import tqdm

from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import json, cv2, random
# from google.colab.patches import cv2_imshow
# from cv2 import imshow as cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog


os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

In [None]:
from datetime import datetime
import logging

from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import LazyConfig, instantiate
from detectron2.engine import (
    AMPTrainer,
    SimpleTrainer,
    default_argument_parser,
    default_setup,
    default_writers,
    hooks,
    launch
)
from detectron2.engine.defaults import _try_get_key
from detectron2.engine.defaults import create_ddp_model
from detectron2.evaluation import inference_on_dataset, print_csv_format
from detectron2.utils import comm # multi-gpu communication

from functools import partial
from detectron2.utils.file_io import PathManager
from omegaconf import OmegaConf
import torch.nn as nn
from fvcore.common.param_scheduler import MultiStepParamScheduler

from detectron2 import model_zoo
from detectron2.config import LazyCall as L # gets executed later
from detectron2.solver import WarmupParamScheduler # initial warmup stage for another scheduler
from detectron2.modeling import MViT
from detectron2.layers import ShapeSpec # basic shape specifier
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.roi_heads import (
    FastRCNNOutputLayers,
    FastRCNNConvFCHead,
    CascadeROIHeads,
)
from detectron2.utils.env import seed_all_rng
from detectron2.data.datasets import register_coco_instances
import detectron2.data.transforms as T
from detectron2.data import (
    DatasetMapper,
    build_detection_test_loader,
    build_detection_train_loader,
    get_detection_dataset_dicts,
)
from detectron2.evaluation import COCOEvaluator
from detectron2.utils.logger import setup_logger
from detectron2.utils.collect_env import collect_env_info

# Kaggle Specific Directory

In [None]:
# # delete all the files and directories recursively in the current working directory ...

# # !rm -rf *

# # make directory ...

# !mkdir /kaggle/working/datasets
# !mkdir /kaggle/working/datasets/badodd
# !mkdir /kaggle/working/datasets/badodd/labels
# !mkdir /kaggle/working/datasets/badodd/labels/train
# !mkdir datasets/badodd/labels/val
# !mkdir datasets/badodd/images
# !mkdir datasets/badodd/images/train
# !mkdir datasets/badodd/images/val
# !mkdir /kaggle/working/datasets/badodd/images/test

In [None]:
#  reference original files without duplicating their content ...

def all_files_in_folder_symlink(source_dir, target_dir):
    files = os.listdir(source_dir)
    
    for file in tqdm(files):
        source_file = os.path.join(source_dir, file)
        target_file = os.path.join(target_dir, file)
        os.symlink(source_file, target_file)

In [None]:
# # symbolic link function as above ...

# all_files_in_folder_symlink("/kaggle/input/dl-enigma-10-sust-cse-carnival-2024/dlenigma1/BadODD/labels/train","/kaggle/working/datasets/badodd/labels/train")
# all_files_in_folder_symlink("/kaggle/input/dl-enigma-10-sust-cse-carnival-2024/dlenigma1/BadODD/images/train","/kaggle/working/datasets/badodd/images/train")
# all_files_in_folder_symlink("/kaggle/input/dl-enigma-10-sust-cse-carnival-2024/dlenigma1/BadODD/images/test","/kaggle/working/datasets/badodd/images/test")

In [None]:
train_dir_path = 'custom_dataset/badodd/images/train'
test_dir_path = 'custom_dataset/badodd/images/test'
train_label_path = 'custom_dataset/badodd/labels/train'
test_label_path = 'custom_dataset/badodd/labels/test'
val_dir_path = 'custom_dataset/badodd/images/val'
val_label_path = 'custom_dataset/badodd/labels/val'

# Input Handling

In [None]:
import json
import cv2
import os
from detectron2.structures import BoxMode

def get_data_dicts(img_dir, label_dir):
    dataset_dicts = []
    image_id = 0

    # Get the full path to the train image directory
    # train_file_path = os.path.abspath(train_file_path)

    # Enumerate the train image folder and loop through each image
    for image_file in os.listdir(img_dir):
        # Construct the full path to the image file
        image_path = os.path.join(img_dir, image_file)

        # Read the image using cv2
        image = cv2.imread(image_path)

        # Extract the image height and width
        image_height, image_width, _ = image.shape

        # Extract the image name without extension
        image_name = os.path.splitext(image_file)[0]

        # Extract the label file path
        label_file = os.path.join(label_dir, image_name + '.txt')

        # Read the label from the label file
        with open(label_file, 'r') as f:
            label_content = f.read()

        # Extract the label information
        label_lines = label_content.strip().split('\n')
        class_ids = []
        x1 = []
        y1 = []
        x2 = []
        y2 = []

        for line in label_lines:
            class_id, x_, y_, w_, h_ = line.split(' ')
            class_ids.append(class_id)
            x_, y_, w_, h_ = float(x_), float(y_), float(w_), float(h_)
            
            x1_ = float((x_ - 0.5 * w_) * image_width)
            y1_ = float((y_ - 0.5 * h_) * image_height)
            x2_ = float(w_ * image_width)
            y2_ = float(h_ * image_height)

            x1.append(x1_)
            y1.append(y1_)
            x2.append(x2_)
            y2.append(y2_)

        # Create the label list of dictionaries
        labels = []
        for i in range(len(class_ids)):
            label = {
                'category_id': int(class_ids[i]),
                'bbox': [x1[i], y1[i], x2[i], y2[i]],
                "bbox_mode": BoxMode.XYWH_ABS,
                "segmentation": []
            }
            labels.append(label)
        # Create the dictionary
        data = {
            'file_name': image_path,
            'height': image_height,
            'width': image_width,
            'image_id': image_id,
            'annotations': labels
        }
#         print(data)
#         break
        # Save the dictionary to the list
        dataset_dicts.append(data)
        image_id = image_id + 1

    # print(dataset_dicts)
    return dataset_dicts

    # # Save the dataset dictionary as a JSON file
    # with open('train.json', 'w') as f:
    #     json.dump(dataset_dicts, f, indent=4)

# Train Validation Split

In [None]:
# def train_val_split(source_image_dir, target_image_dir, source_label_dir, target_label_dir, split_ratio):
#     # read the image and label files ...
#     image_files = os.listdir(source_image_dir)
#     label_files = os.listdir(source_label_dir)

#     # sort to ensure the order is consistent between images and labels
#     image_files.sort()
#     label_files.sort()

#     # shuffle indices
#     random.seed(42)
#     indices = list(range(len(image_files)))
#     random.shuffle(indices)

#     split_index = int(len(indices) * split_ratio)

#     train_indices = indices[split_index:]
#     val_indices = indices[:split_index]

#     # move the validation image and label files into new directories ...
#     for idx in val_indices:
#         image_file = image_files[idx]
#         label_file = label_files[idx]

#         source_image_file = os.path.join(source_image_dir, image_file)
#         target_image_file = os.path.join(target_image_dir, image_file)
#         shutil.move(source_image_file, target_image_file)

#         source_label_file = os.path.join(source_label_dir, label_file)
#         target_label_file = os.path.join(target_label_dir, label_file)
#         shutil.move(source_label_file, target_label_file)

# # split the train and validation images and labels ...
# train_val_split("datasets/badodd/images/train",
#                 "datasets/badodd/images/val",
#                 "datasets/badodd/labels/train",
#                 "datasets/badodd/labels/val",
#                 0.15)


In [None]:
!tree -d

In [None]:
import os

def count_files_in_directory(directory):
    return sum(len(files) for _, _, files in os.walk(directory))

# Count the number of train, val, and test images and labels
train_images_count = count_files_in_directory(train_dir_path)
train_labels_count = count_files_in_directory(train_label_path)

val_images_count = count_files_in_directory(val_dir_path)
val_labels_count = count_files_in_directory(val_label_path)

test_images_count = count_files_in_directory(test_dir_path)
test_labels_count = count_files_in_directory(test_label_path)

print("Train Images:", train_images_count)
print("Train Labels:", train_labels_count)

print("Validation Images:", val_images_count)
print("Validation Labels:", val_labels_count)

print("Test Images:", test_images_count)
print("Test Labels:", test_labels_count)

In [None]:
classes = ["auto_rickshaw", "bicycle", "bus", "car", "cart_vehicle", "construction_vehicle", "motorbike", "person", "priority_vehicle", "three_wheeler", "train", "truck", "wheelchair"]

DatasetCatalog.clear()

for d in ["train", "val"]:
    DatasetCatalog.register("badodd_" + d, lambda d=d: get_data_dicts('datasets/badodd/images/'+d, 'datasets/badodd/labels/'+d))
    MetadataCatalog.get("badodd_" + d).set(thing_classes=classes)
# DatasetCatalog.register("badodd_" + d, get_data_dicts(train_dir_path, train_label_path))
# MetadataCatalog.get("badodd_train").set(thing_classes=classes)
badodd_metadata = MetadataCatalog.get("badodd_train")

In [None]:
# from detectron2.data import DatasetCatalog, MetadataCatalog

# classes = ["auto_rickshaw", "bicycle", "bus", "car", "cart_vehicle", "construction_vehicle", "motorbike", "person", "priority_vehicle", "three_wheeler", "train", "truck", "wheelchair"]

# DatasetCatalog.clear()

# DatasetCatalog.register("badodd_train", lambda: get_data_dicts('/kaggle/working/datasets/badodd/images/train', '/kaggle/working/datasets/badodd/labels/train'))
# MetadataCatalog.get('badodd_train').set(thing_classes=classes)
# badodd_metadata = MetadataCatalog.get('badodd_train')

In [None]:
# import random
# from detectron2.utils.visualizer import Visualizer
# import matplotlib.pyplot as plt

# dataset_dicts = DatasetCatalog.get('badodd_train')
# for d in random.sample(dataset_dicts, 5):
#     img = cv2.imread(d["file_name"])
#     v = Visualizer(img[:, :, ::-1], metadata=badodd_metadata, scale=0.5)
#     v = v.draw_dataset_dict(d)
#     plt.figure(figsize = (14, 10))
#     plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
#     plt.show()

In [None]:
# dataset_dicts = get_data_dicts(train_dir_path, train_label_path)

In [None]:
# for d in random.sample(dataset_dicts, 3):
#     print(d["file_name"])
#     img = cv2.imread("/kaggle/working/datasets/badodd/images/train/" + d["file_name"])
#     visualizer = Visualizer(img[:, :, ::-1], metadata=badodd_metadata, scale=0.5)
#     out = visualizer.draw_dataset_dict(d)
#     cv2_imshow(out.get_image()[:, :, ::-1])

# Train

In [None]:
def setup():
    model = model_zoo.get_config("common/models/mask_rcnn_fpn.py").model
    constants = model_zoo.get_config("common/data/constants.py").constants
    model.pixel_mean = constants.imagenet_rgb256_mean
    model.pixel_std = constants.imagenet_rgb256_std
    model.input_format = "RGB"
    model.backbone.bottom_up = L(MViT)(
        embed_dim=96, # 96,
        depth=24, # 10,
        num_heads=1,
        last_block_indexes= (1, 4, 20, 23), #(0, 2, 7, 9), #(1,4,20,23)
        residual_pooling=True,
        drop_path_rate= 0.4, # 0.2,
        norm_layer=partial(nn.LayerNorm, eps=1e-6),
        out_features=("scale2", "scale3", "scale4", "scale5"),
    )
    model.backbone.in_features = "${.bottom_up.out_features}"
    model.backbone.square_pad = 1024
    
    # New heads and LN
    model.backbone.norm = "LN"  # Use LN in FPN
    model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN"

    # 2conv in RPN:
    model.proposal_generator.head.conv_dims = [-1, -1]

    # arguments that don't exist for Cascade R-CNN
    [model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
    model.roi_heads.update(
        _target_=CascadeROIHeads,
        box_heads=[
            L(FastRCNNConvFCHead)(
                input_shape=ShapeSpec(channels=256, height=7, width=7), # 256
                conv_dims=[256, 256, 256, 256],# [768, 768, 768, 768], # 
                fc_dims=[1024],
                conv_norm="LN",
            )
            for _ in range(3) # previously 3 (not 4), really needs to be changed?!
        ],
        box_predictors=[
            L(FastRCNNOutputLayers)(
                input_shape=ShapeSpec(channels=1024),
                test_score_thresh=0.05,
                box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
                cls_agnostic_bbox_reg=True,
                num_classes="${...num_classes}",
                test_topk_per_image=1000
            )
            for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
        ],
        proposal_matchers=[ # labels were [0,1]; really needs to be changed?!
            L(Matcher)(thresholds=[th], labels=[0,1], allow_low_quality_matches=False)
            for th in [0.5, 0.6, 0.7]
        ],
    )
    model.roi_heads.num_classes = 13
    model.roi_heads.batch_size_per_image = 128 # 512

    dataloader = OmegaConf.create()

    image_size = 1024
    dataloader.train = L(build_detection_train_loader)(
        dataset=L(get_detection_dataset_dicts)(names="badodd_train"),
        mapper=L(DatasetMapper)(
            is_train=True,
            augmentations=[
#                 L(T.RandomBrightness)(intensity_min=0.8,intensity_max=1.2),
#                 L(T.RandomContrast)(intensity_min=0.5,intensity_max=1.5),
#                 L(T.RandomSaturation)(intensity_min=0.5,intensity_max=1.0),
#                 L(T.RandomRotation)(angle=[-5, 5], sample_style="range"),
                L(T.ResizeScale)(
                    min_scale=0.1, max_scale=2.0, target_width=image_size, target_height=image_size
                ),
                L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False),
            ],
            image_format="RGB",
            use_instance_mask=True,
        ),
        total_batch_size=2,
        num_workers=2,
    )

    dataloader.test = L(build_detection_test_loader)(
        dataset=L(get_detection_dataset_dicts)(names="badodd_val", filter_empty=False),
        mapper=L(DatasetMapper)(
            is_train=False,
            augmentations=[
                L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size),
            ],
            image_format="RGB",
        ),
        batch_size=1,
        num_workers=1,
    )

    dataloader.evaluator = L(COCOEvaluator)(
        dataset_name="${..test.dataset.names}",
    )

    dataloader.train.num_workers = 1
    dataloader.train.total_batch_size = 2
    # recompute boxes due to cropping
    dataloader.train.mapper.recompute_boxes = True

    # Initialization and trainer settings
    train = model_zoo.get_config("common/train.py").train
    train.amp.enabled = True
    train.ddp.fp16_compression = True # maybe not relevant in single GPU
    train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_B_in1k.pyth" # "detectron2://ImageNetPretrained/mvitv2/MViTv2_T_in1k.pyth" # "detectron2://ImageNetPretrained/mvitv2/MViTv2_B_in21k.pyth"
    train.output_dir = "./output/mvit2b_again" # b
    # Schedule
    # 36 epoch = 20365/16 * 36 = 45821 iterations 
    train.max_iter = 15273 # 15273 # 45821
    train.eval_period = 5000
    train.log_period = 100 # 20
    train.checkpointer.period = 5000
    train.device = "cuda"

    lr_multiplier = L(WarmupParamScheduler)(
        scheduler=L(MultiStepParamScheduler)(
            values=[1.0, 0.1, 0.01],
            milestones=[13576, 14815], # [40730, 44447]
            num_updates=train.max_iter,
        ),
        warmup_length=50 / train.max_iter,
        warmup_factor=0.001,
    )

    optimizer = model_zoo.get_config("common/optim.py").AdamW
    optimizer.params.overrides = {"pos_embed": {"weight_decay": 0.0}}
    optimizer.lr = 0.00008

    dataloader.evaluator.output_dir = train.output_dir

    cfg = OmegaConf.create()
    cfg.model = model
    cfg.dataloader = dataloader
    cfg.train = train
    cfg.optimizer = optimizer
    cfg.lr_multiplier = lr_multiplier

    return cfg

In [None]:
def do_test(cfg, model):
    if "evaluator" in cfg.dataloader:
        ret = inference_on_dataset(
            model, instantiate(cfg.dataloader.test), instantiate(cfg.dataloader.evaluator)
        )
        print_csv_format(ret)
        return ret

In [None]:
def do_train(resume, cfg):
    model = instantiate(cfg.model)
    logger = logging.getLogger("detectron2")
    logger.info("Model:\n{}".format(model))
    model.to(cfg.train.device)

    cfg.optimizer.params.model = model
    optim = instantiate(cfg.optimizer)

    train_loader = instantiate(cfg.dataloader.train)

    model = create_ddp_model(model, **cfg.train.ddp) # multi gpu not needed?!
    # model = torch.nn.DataParallel(model)
    trainer = (AMPTrainer if cfg.train.amp.enabled else SimpleTrainer)(model, train_loader, optim)
    checkpointer = DetectionCheckpointer(
        model,
        cfg.train.output_dir,
        trainer=trainer,
    )
    trainer.register_hooks(
        [
            hooks.IterationTimer(),
            hooks.LRScheduler(scheduler=instantiate(cfg.lr_multiplier)),
            hooks.PeriodicCheckpointer(checkpointer, **cfg.train.checkpointer)
            if comm.is_main_process()
            else None,
            hooks.EvalHook(cfg.train.eval_period, lambda: do_test(cfg, model)),
            hooks.PeriodicWriter(
                default_writers(cfg.train.output_dir, cfg.train.max_iter),
                period=cfg.train.log_period,
            )
            if comm.is_main_process()
            else None,
        ]
    )

    checkpointer.resume_or_load(cfg.train.init_checkpoint, resume=resume)
    if resume and checkpointer.has_checkpoint():
        start_iter = trainer.iter + 1
    else:
        start_iter = 0
    trainer.train(start_iter, cfg.train.max_iter)

In [None]:
def main():
    cfg=setup()
    output_dir = cfg.train.output_dir
    if comm.is_main_process() and output_dir:
        PathManager.mkdirs(output_dir)

    rank = comm.get_rank()
    setup_logger(output_dir, distributed_rank=rank, name="fvcore")
    logger = setup_logger(output_dir, distributed_rank=rank)

#     setup_logger(output_dir)
#     logger = logging.getLogger("detectron2")


    logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
    logger.info("Environment info:\n" + collect_env_info())

    # make sure each worker has a different, yet deterministic seed if specified
    cfg.train.seed = int(datetime.now().timestamp()) # + rank

    import torch
    torch.cuda.empty_cache()
    import gc
    gc.collect()

    do_train(False, cfg)

# if __name__ == "__main__":
# #     torch.cuda.empty_cache() 

#     launch(
#         main(
#         4, # train on 8 GPUs
#         # num_machines=1,
#         # args=(),
#     ))
main()
# launch(main(4))

# Validation

In [None]:
cfg = setup()
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.model.weights = os.path.join("./output/mvit2b", "model_final.pth")  # path to the model we just trained
# cfg.model.roi_heads.score_tresh_test = 0.5
# cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6 # iou
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("badodd_val", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "badodd_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

In [None]:
dataset_dicts = DatasetCatalog.get('badodd_val')

In [None]:
from detectron2.utils.visualizer import ColorMode
import random
import matplotlib.pyplot as plt

for d in random.sample(dataset_dicts, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1], metadata=badodd_metadata, scale=0.8)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize = (14, 10))
    plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    plt.show()

# Inference

In [None]:
# Define the test dataset
# import json
# import cv2
# import os
from detectron2.structures import BoxMode

def get_test_dataset(img_dir):
    dataset_dicts = []
    image_id = 0

    # Get the full path to the train image directory
    # train_file_path = os.path.abspath(train_file_path)

    # Enumerate the train image folder and loop through each image
    for image_file in os.listdir(img_dir):
        # Construct the full path to the image file
        image_path = os.path.join(img_dir, image_file)

        # Read the image using cv2
        image = cv2.imread(image_path)

        # Extract the image height and width
        image_height, image_width, _ = image.shape

        # Extract the image name without extension
        image_name = os.path.splitext(image_file)[0]

        # Create the dictionary
        data = {
            'file_name': image_path,
            'height': image_height,
            'width': image_width,
            'image_id': image_id,
#             'annotations': labels
        }
#         print(data)
#         break
        # Save the dictionary to the list
        dataset_dicts.append(data)
        image_id = image_id + 1

    # print(dataset_dicts)
    return dataset_dicts

    # # Save the dataset dictionary as a JSON file
    # with open('train.json', 'w') as f:
    #     json.dump(dataset_dicts, f, indent=4)

# for d in ["test"]:
#     DatasetCatalog.register("badodd_" + d, lambda d=d: get_test_dataset('/kaggle/working/datasets/badodd/images/test'))
#     MetadataCatalog.get("badodd_" + d).set(thing_classes=classes)


# DatasetCatalog.register("badodd_test", lambda: get_test_dataset('/kaggle/working/datasets/badodd/images/test'))
# MetadataCatalog.get('badodd_test').set(thing_classes=classes)

In [None]:
# Make predictions
# test_dataset = DatasetCatalog.get("badodd_test")
# outputs = []

In [None]:
# # Inference should use the config with parameters that are used in training
# # cfg now already contains everything we've set previously. We changed it a little bit for inference:
# cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
# # cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6 # iou
# predictor = DefaultPredictor(cfg)

In [None]:
# from detectron2.utils.visualizer import ColorMode
# import random
# import matplotlib.pyplot as plt

# dataset_dicts = DatasetCatalog.get('badodd_train')
# for d in random.sample(dataset_dicts, 5):    
#     im = cv2.imread(d["file_name"])
#     outputs = predictor(im)
#     v = Visualizer(im[:, :, ::-1], metadata=badodd_metadata, scale=0.8)
#     v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
#     plt.figure(figsize = (14, 10))
#     plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
#     plt.show()

In [None]:
# for image_path in test_dataset:
# #     print(image_path)
#     image = cv2.imread(image_path["file_name"])
#     output = predictor(image)
#     outputs.append(output)
# #     print(output['instances'])

In [None]:
# print(outputs[:1])

In [None]:
# get prediction boxes string according to the submission format ...

def get_prediction_string(boxes, scores, classes):
    pred_strs = []
    for i, score in enumerate(scores):
        single_pred_str = ""
        single_pred_str += str(float(classes[i])) + " " + str(float(score)) + " "
        
        x1, y1, x2, y2 = boxes[i]
        x1 = float(x1)
        y1 = float(y1)
        width = abs(float(x2) - float(x1))
        height = abs(float(y2) - float(y1))
        
        #single_pred_str += " ".join(str(float(x)) for x in boxes[i])
        single_pred_str +=  f"{x1} {y1} {width} {height}"
        
        pred_strs.append(single_pred_str)
    ans = ','.join(map(str, pred_strs))
    if len(ans):
        return ans
#     the solution metrics faield in case of a NaN, '' (empty). So, return "0 0 0 0 0 0" for NaN, '' (empty) string
    return "0 0 0 0 0 0"

# get the predcition in id, ImageID, PredictionString_pred foramt ...

def get_prediction_entry(i, filename, boxes, scores, classes):
    return {
        "id": i, # strating from 0 ...
        "ImageID": filename.split('.')[0], # before the extension ...
        "PredictionString_pred": get_prediction_string(boxes, scores, classes)
    }

# Directory path ...
test_directory = "/kaggle/input/dl-enigma-10-sust-cse-carnival-2024/dlenigma1/BadODD/images/test"

# # Load the model ...
# model = YOLO('/kaggle/working/runs/detect/train/weights/best.pt')

In [None]:
# do the inference ...

def predict_all_files(test_dataset):
#     predictions = []
#     for i,filename in tqdm(enumerate(os.listdir(test_directory))):
#         if filename.endswith(".jpg"):
#             filepath = os.path.join(test_directory, filename)
#             results = model.predict(source=filepath, conf=0.50, verbose=False)
#             boxes = results[0].boxes.xywhn
#             scores = results[0].boxes.conf
#             classes = results[0].boxes.cls
#             prediction = get_prediction_entry(i, filename, boxes, scores, classes)
#             predictions.append(prediction)
# #             to csv format ...
    outputs = []
    predictions = []
    for i, filename in tqdm(enumerate(os.listdir(test_dataset))):
#         print(filename)
        filepath = os.path.join(test_directory, filename)
        image = cv2.imread(filepath)
#         print(image)
        output = predictor(image)
        outputs.append(output)
#         print(output['instances'])    

        height, width = image.shape[:2]
        boxes = output["instances"].pred_boxes.tensor
        boxes[:, 0] /= width
        boxes[:, 1] /= height
        boxes[:, 2] /= width
        boxes[:, 3] /= height
#         print(boxes)    
        scores = output["instances"].scores
#         print(scores)
        classes = output["instances"].pred_classes
#         print(classes)
#         break
        
        prediction = get_prediction_entry(i, filename, boxes, scores, classes)
        predictions.append(prediction)
            
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv("submission.csv", index=False)

# call the inference function ...
predict_all_files(test_directory)


In [None]:
submission_df = pd.read_csv('/kaggle/working/submission.csv')

In [None]:
submission_df.head()

In [None]:
submission_df.shape


In [None]:
submission_df.isnull().sum()


In [None]:
submission_df.to_csv('acceptable_submission_format.csv',index=False)
