In [None]:
! pip install pycocotools
! pip install openmim
! pip install mmengine
! pip install addict
! pip install yapf

In [None]:
! pip install -U openmim
! mim install mmcv

In [None]:
# ! git clone https://github.com/open-mmlab/mmdetection.git
%cd mmdetection
! pip install -v -e .

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from collections import Counter
from decimal import Decimal
import csv
import random
from PIL import Image
import cv2
import tqdm 
import shutil
import json
import pickle

In [None]:
from mmengine.utils import get_git_hash
from mmengine.utils.dl_utils import collect_env as collect_base_env

import mmdet


def collect_env():
    """Collect the information of the running environments."""
    env_info = collect_base_env()
    env_info['MMDetection'] = f'{mmdet.__version__}+{get_git_hash()[:7]}'
    return env_info


if __name__ == '__main__':
    for name, val in collect_env().items():
        print(f'{name}: {val}')

In [None]:
! pip install pylabel 
from pylabel import importer


In [None]:
# # path_to_annotations = "/kaggle/working/dlenigma1/BadODD/labels/train"
# # path_to_annotations = "/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/labels/train"
# path_to_annotations = "/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/labels/val"
# path_to_annotations = "/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/labels/test"
path_to_annotations = "/media/quadro/NVME/Asif-Thesis/Enigma/processed_dataset/dhakaai/labels/test"


# #Identify the path to get from the annotations to the images
# # path_to_images = "/kaggle/working/dlenigma1/BadODD/images/train"
# # path_to_images = "/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/images/train"
# path_to_images = "/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/images/val"
path_to_images = "/media/quadro/NVME/Asif-Thesis/Enigma/processed_dataset/dhakaai/images/test"

# #Import the dataset into the pylable schema
# #Class names are defined here https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml
yoloclasses = ['auto_rickshaw', 'bicycle', 'bus', 'car', 'cart_vehicle', 'construction_vehicle', 'motorbike', 'person', 'priority_vehicle', 'three_wheeler', 'truck', 'wheelchair']
dataset = importer.ImportYoloV5(path=path_to_annotations, path_to_images=path_to_images, cat_names=yoloclasses,
    img_ext="jpg")

dataset.df.head(5)

In [None]:
# # dataset.export.ExportToCoco(cat_id_index=1, output_path='/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/images/train.json')
# dataset.export.ExportToCoco(cat_id_index=1, output_path='/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/images/val.json')
# dataset.export.ExportToCoco(cat_id_index=1, output_path='/media/quadro/NVME/Asif-Thesis/MViT/custom_dataset/badodd/images/test.json')
dataset.export.ExportToCoco(cat_id_index=1, output_path='/media/quadro/NVME/Asif-Thesis/Enigma/processed_dataset/dhakaai/images/test.json')

# Training

In [None]:
config_vehicle = """
# Inherit and overwrite part of the config based on this config
_base_ = ['co_dino_5scale_r50_8xb2_1x_coco.py']

pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth'  # noqa
load_from = 'https://download.openmmlab.com/mmdetection/v3.0/codetr/co_dino_5scale_swin_large_16e_o365tococo-614254c9.pth'  # noqa

train_batch_size_per_gpu = 1
train_num_workers = 1
# root directory where the data folder and annotation json are found
data_root = '/media/quadro/NVME/Asif-Thesis/Enigma/processed_dataset/dhakaai/images/'
max_epochs = 6
stage2_num_epochs = 3
base_lr = 0.00008

# meta information about the number of classes and their annotation color in the inference output
metainfo = {
    'classes': ('auto_rickshaw','bicycle', 'bus','car', 'cart_vehicle','construction_vehicle','motorbike','person','priority_vehicle','three_wheeler', 'truck','wheelchair'),
    'palette': [
    (220, 20, 60),  # Red
    (255, 165, 0),  # Orange
    (255, 255, 0),  # Yellow
    (0, 128, 0),    # Green
    (0, 0, 255),    # Blue
    (128, 0, 128),  # Purple
    (255, 0, 255),  # Magenta
    (128, 128, 128),# Gray
    (255, 192, 203),# Pink
    (0, 255, 255),  # Cyan
    # (173, 216, 230),# Light Blue
    (0, 255, 0),    # Lime Green
    (139, 69, 19)   # Brown
]
}
# model settings
# number of classes must match the dataset's number of classes
model = dict(
    backbone=dict(
        _delete_=True,
        type='SwinTransformer',
        pretrain_img_size=384,
        embed_dims=192,
        depths=[2, 2, 18, 2],
        num_heads=[6, 12, 24, 48],
        window_size=12,
        mlp_ratio=4,
        qkv_bias=True,
        qk_scale=None,
        drop_rate=0.,
        attn_drop_rate=0.,
        drop_path_rate=0.3,
        patch_norm=True,
        out_indices=(0, 1, 2, 3),
        # Please only add indices that would be used
        # in FPN, otherwise some parameter will not be used
        with_cp=True,
        convert_weights=True,
        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
    neck=dict(in_channels=[192, 384, 768, 1536]),
    query_head=dict(
        num_classes=12,dn_cfg=dict(box_noise_scale=0.4, group_cfg=dict(num_dn_queries=500)),
        transformer=dict(encoder=dict(with_cp=6))))

train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='RandomFlip', prob=0.5),
    dict(
        type='RandomChoice',
        transforms=[
            [
                dict(
                    type='RandomChoiceResize',
                    scales=[(480, 2048), (512, 2048), (544, 2048), (576, 2048),
                            (608, 2048), (640, 2048), (672, 2048), (704, 2048),
                            (736, 2048), (768, 2048), (800, 2048), (832, 2048),
                            (864, 2048), (896, 2048), (928, 2048), (960, 2048),
                            (992, 2048), (1024, 2048), (1056, 2048),
                            (1088, 2048), (1120, 2048), (1152, 2048),
                            (1184, 2048), (1216, 2048), (1248, 2048),
                            (1280, 2048), (1312, 2048), (1344, 2048),
                            (1376, 2048), (1408, 2048), (1440, 2048),
                            (1472, 2048), (1504, 2048), (1536, 2048)],
                    keep_ratio=True)
            ],
            [
                dict(
                    type='RandomChoiceResize',
                    # The radio of all image in train dataset < 7
                    # follow the original implement
                    scales=[(400, 4200), (500, 4200), (600, 4200)],
                    keep_ratio=True),
                dict(
                    type='RandomCrop',
                    crop_type='absolute_range',
                    crop_size=(384, 600),
                    allow_negative_crop=True),
                dict(
                    type='RandomChoiceResize',
                    scales=[(480, 2048), (512, 2048), (544, 2048), (576, 2048),
                            (608, 2048), (640, 2048), (672, 2048), (704, 2048),
                            (736, 2048), (768, 2048), (800, 2048), (832, 2048),
                            (864, 2048), (896, 2048), (928, 2048), (960, 2048),
                            (992, 2048), (1024, 2048), (1056, 2048),
                            (1088, 2048), (1120, 2048), (1152, 2048),
                            (1184, 2048), (1216, 2048), (1248, 2048),
                            (1280, 2048), (1312, 2048), (1344, 2048),
                            (1376, 2048), (1408, 2048), (1440, 2048),
                            (1472, 2048), (1504, 2048), (1536, 2048)],
                    keep_ratio=True)
            ]
        ]),
    dict(type='PackDetInputs')
]

train_dataloader = dict(
    batch_size=train_batch_size_per_gpu,
    num_workers=train_num_workers,
    dataset=dict(
        data_root=data_root,
        metainfo=metainfo,
        data_prefix=dict(img='train/'),
        ann_file='train.json'))
print(train_dataloader);

test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='Resize', scale=(2048, 1280), keep_ratio=True),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                   'scale_factor'))
]

# This notebook uses the train data as validation. If you want to use a separate validation dataset, 
# you must put the paths for validation instead train in the val_dataloader
val_dataloader = dict(
    dataset=dict(
        data_root=data_root,
        metainfo=metainfo,
        data_prefix=dict(img='test/'),
        ann_file='test.json'))
test_dataloader = val_dataloader

# Modify metric related settings
# for validation set, change the train.json to the name of the validation json.
val_evaluator = dict(ann_file=data_root + 'test.json')
test_evaluator = val_evaluator

optim_wrapper = dict(optimizer=dict(lr=1e-4))


train_cfg = dict(max_epochs=max_epochs)
# out_dir is the directory where the checkpoints are saved
default_hooks = dict(checkpoint=dict(type='CheckpointHook', save_best='auto',out_dir='../checkpoints_codetr_final'))
param_scheduler = [
    dict(
        type='MultiStepLR',
        begin=0,
        end=max_epochs,
        by_epoch=True,
        milestones=[8],
        gamma=0.1)
]

"""

with open('./projects/CO-DETR/configs/codino/co_dino_5scale_swin_l_16xb1_16e_o365tococo.py', 'w') as f:
    f.write(config_vehicle)

In [None]:
# ! python tools/train.py ./projects/CO-DETR/configs/codino/co_dino_5scale_swin_l_16xb1_16e_o365tococo.py 

In [None]:
!python tools/test.py ./projects/CO-DETR/configs/codino/co_dino_5scale_swin_l_16xb1_16e_o365tococo.py ../co_dino_5scale_swin_l_16xb1_16e_o365tococo/epoch_1.pth --out results.pkl --work-dir ./results

In [None]:
# !bash tools/dist_train.sh ./projects/CO-DETR/configs/codino/co_dino_5scale_swin_l_16xb1_16e_o365tococo.py 4