In [1]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[34 lines of output][0m
  [31m   [0m Traceback (most recent call last):
  [31m   [0m   File "<string>", line 2, in <module>
  [31m   [0m   File "<pip-setuptools-caller>", line 34, in <module>
  [31m   [0m   File "/tmp/pip-install-r96n4ehp/pyyaml_bb8f4320c6cd404984d26db62bde0084/setup.py", line 291, in <module>
  [31m   [0m     setup(
  [31m   [0m   File "/opt/conda/lib/python3.10/site-packages/setuptools/_distutils/core.py", line 185, in setup
  [31m   [0m     return run_commands(dist)
  [31m   [0m   File "/opt/conda/lib/python3.1

In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Mon_Apr__3_17:16:06_PDT_2023
Cuda compilation tools, release 12.1, V12.1.105
Build cuda_12.1.r12.1/compiler.32688072_0
torch:  2.1 ; cuda:  2.1.2
detectron2: 0.6


In [3]:
import torch
import os
import cv2
import random
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
plt.rcParams['figure.figsize'] = (20.0, 10.0)

In [5]:
import torch
import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_train_loader, build_detection_test_loader
from detectron2.data import transforms as T

In [6]:
# Define the mapping from class IDs to class names
class_names_dict = {
    0: 'pedestrian',
    1: 'people',
    2: 'bicycle',
    3: 'car',
    4: 'van',
    5: 'truck',
    6: 'tricycle',
    7: 'awning-tricycle',
    8: 'bus',
    9: 'motor'
}

In [7]:
# Function to load dataset annotations
def get_visdrone_dicts(img_dir, ann_dir):
    dataset_dicts = []
    for idx, filename in enumerate(os.listdir(img_dir)):
        record = {}
        img_path = os.path.join(img_dir, filename)
        height, width = cv2.imread(img_path).shape[:2]

        record["file_name"] = img_path
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width

        ann_file = os.path.join(ann_dir, filename.replace('.jpg', '.txt'))
        objs = []

        with open(ann_file, 'r') as file:
            lines = file.readlines()
            for line in lines:
                elements = line.strip().split(',')
                x_min, y_min, w, h, class_id = int(elements[0]), int(elements[1]), int(elements[2]), int(elements[3]), int(elements[5]) - 1
                if (0 <= class_id <= 9 and w > 0 and h > 0):
                    obj = {
                        "bbox": [x_min, y_min, x_min + w, y_min + h],
                        "bbox_mode": detectron2.structures.BoxMode.XYXY_ABS,
                        "category_id": class_id,
                    }
                    objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts


In [8]:
# Paths for train, validation, and test datasets
image_train_folder = "/kaggle/input/visdrone2019/VisDrone2019-DET-train/VisDrone2019-DET-train/images/"
annotation_train_folder = "/kaggle/input/visdrone2019/VisDrone2019-DET-train/VisDrone2019-DET-train/annotations/"

image_val_folder = "/kaggle/input/visdrone2019/VisDrone2019-DET-val/VisDrone2019-DET-val/images/"
annotation_val_folder = "/kaggle/input/visdrone2019/VisDrone2019-DET-val/VisDrone2019-DET-val/annotations/"

image_test_folder = "/kaggle/input/visdrone2019/VisDrone2019-DET-test-dev/images/"
annotation_test_folder = "/kaggle/input/visdrone2019/VisDrone2019-DET-test-dev/annotations/"

In [9]:
# Register datasets
DatasetCatalog.register("visdrone_train", lambda: get_visdrone_dicts(image_train_folder, annotation_train_folder))
MetadataCatalog.get("visdrone_train").set(thing_classes=list(class_names_dict.values()))

DatasetCatalog.register("visdrone_val", lambda: get_visdrone_dicts(image_val_folder, annotation_val_folder))
MetadataCatalog.get("visdrone_val").set(thing_classes=list(class_names_dict.values()))

DatasetCatalog.register("visdrone_test", lambda: get_visdrone_dicts(image_test_folder, annotation_test_folder))
MetadataCatalog.get("visdrone_test").set(thing_classes=list(class_names_dict.values()))

namespace(name='visdrone_test',
          thing_classes=['pedestrian',
                         'people',
                         'bicycle',
                         'car',
                         'van',
                         'truck',
                         'tricycle',
                         'awning-tricycle',
                         'bus',
                         'motor'])

In [10]:
# Setup configurations for training
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))

cfg.DATASETS.TRAIN = ("visdrone_train",)
cfg.DATASETS.TEST = ("visdrone_val",)

cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00005
cfg.SOLVER.MAX_ITER = 10000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(class_names_dict)

In [11]:
# Augmentation setup
augmentation = [
    T.RandomFlip(prob=0.3, horizontal=True, vertical=False),  # Random horizontal flip with 30% probability
    T.RandomBrightness(0.9, 1.1),  # Random brightness adjustment
    T.RandomContrast(0.8, 1.2),    # Random contrast adjustment
    T.RandomSaturation(0.8, 1.2),  # Random saturation adjustment
    T.RandomRotation(angle=[0, 90, 180, 270], expand=True),  # Random rotation
]

# Create the AugmentationList instance
augmentation_list = T.AugmentationList(augmentation)

# Apply augmentation in the cfg.INPUT as a list
cfg.INPUT.AUGMENTATION = augmentation

In [12]:
# Create output directory
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

In [13]:
# Define a custom DatasetMapper with augmentation
class CustomDatasetMapper:
    def __init__(self, cfg):
        # Dataset augmentation as defined in the cfg.INPUT
        self.augmentation = T.AugmentationList(cfg.INPUT.AUGMENTATION)

    def __call__(self, dataset_dict):
        # Read image and annotations
        image = cv2.imread(dataset_dict["file_name"])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        annos = dataset_dict["annotations"]

        # Apply augmentation
        transforms = self.augmentation(image=image, annotations=annos)
        image = transforms["image"]
        annos = transforms["annotations"]

        # Update dataset_dict with augmented data
        dataset_dict["image"] = image
        dataset_dict["annotations"] = annos
        return dataset_dict

# Set custom mapper function for training
def mapper_fn_train(dataset_dict):
    return CustomDatasetMapper(cfg)(dataset_dict)

In [14]:
# Training
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

model_final_280758.pkl: 167MB [00:03, 46.6MB/s]                              
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
2024-06-14 13:14:46.579999: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-14 13:14:46.580164: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-14 13:14:46.714098: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [15]:
# Save final model weights
model_weights_path = os.path.join("/kaggle/working/output", "visdrone2019_detectron2.pth")
torch.save(trainer.model.state_dict(), model_weights_path)

In [16]:
# Evaluate on test set
evaluator = COCOEvaluator("visdrone_test", cfg, False, output_dir="./output/")
test_loader = build_detection_test_loader(cfg, "visdrone_test")
print(inference_on_dataset(trainer.model, test_loader, evaluator))

  self.pid = os.fork()
  self.pid = os.fork()


Loading and preparing results...
DONE (t=0.74s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=86.85s).
Accumulating evaluation results...
DONE (t=3.98s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.098
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.178
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.099
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.043
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.158
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.250
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.044
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.145
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.185
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1