In [None]:
!pip install -U openmim
!mim install mmcv-full

In [None]:
!git clone https://github.com/open-mmlab/mmdetection.git

In [None]:
%cd mmdetection
!pip install -v -e .

In [None]:
!curl -L "https://public.roboflow.com/ds/xcrhGBJ1aB?key=bdAQK4gH2e" > data_pets.zip

In [None]:
!unzip 'data_pets.zip' -d data_pets

In [None]:
!mkdir data_pets/annotations
!mv data_pets/train/_annotations.coco.json data_pets/annotations/instances_train.json
!mv data_pets/valid/_annotations.coco.json data_pets/annotations/instances_val.json
!mv data_pets/test/_annotations.coco.json data_pets/annotations/instances_test.json

In [None]:
%%writefile mmdet/models/backbones/my_model.py
import torch.nn as nn
from mmcv.runner import BaseModule
from ..builder import BACKBONES

@BACKBONES.register_module()
class MyModel(BaseModule):
    def __init__(self, in_channels):
        super(MyModel, self).__init__()

        self.layers = nn.ModuleList()

        for i, out_channels in enumerate([128, 256, 512]):
            block = nn.Sequential(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                        kernel_size=3, stride=2, padding=1, bias=False),

                  nn.BatchNorm2d(out_channels),
                  nn.ReLU(inplace=True))

            in_channels = out_channels
            self.layers.append(block)

    def forward(self, x):
        outputs = []
        for module in self.layers:
            x = module(x)
            outputs.append(x)
        return tuple(outputs)

In [None]:
%%writefile mmdet/models/backbones/my_model.py
import torch.nn as nn
from mmcv.cnn import build_conv_layer, build_norm_layer, build_activation_layer
from mmcv.runner import BaseModule
from ..builder import BACKBONES

@BACKBONES.register_module()
class MyModel(BaseModule):
    def __init__(self, in_channels, stride=2, padding=1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU')):
        super(MyModel, self).__init__()

        self.layers = nn.ModuleList()

        for i, out_channels in enumerate([128, 256, 512]):
            block = nn.Sequential(build_conv_layer(conv_cfg, in_channels, out_channels, kernel_size=3, 
                                          stride=stride, padding=padding, bias=False),
            
                                  build_norm_layer(norm_cfg, out_channels)[1],
                                  build_activation_layer(act_cfg))

            in_channels = out_channels
            self.layers.append(block)

    def forward(self, x):
        outputs = []
        for module in self.layers:
            x = module(x)
            outputs.append(x)
        return tuple(outputs)

In [None]:
%%writefile mmdet/models/backbones/__init__.py
# Copyright (c) OpenMMLab. All rights reserved.
from .csp_darknet import CSPDarknet
from .darknet import Darknet
from .detectors_resnet import DetectoRS_ResNet
from .detectors_resnext import DetectoRS_ResNeXt
from .efficientnet import EfficientNet
from .hourglass import HourglassNet
from .hrnet import HRNet
from .mobilenet_v2 import MobileNetV2
from .pvt import PyramidVisionTransformer, PyramidVisionTransformerV2
from .regnet import RegNet
from .res2net import Res2Net
from .resnest import ResNeSt
from .resnet import ResNet, ResNetV1d
from .resnext import ResNeXt
from .ssd_vgg import SSDVGG
from .swin import SwinTransformer
from .trident_resnet import TridentResNet
from .my_model import MyModel

__all__ = [
    'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet',
    'MobileNetV2', 'Res2Net', 'HourglassNet', 'DetectoRS_ResNet',
    'DetectoRS_ResNeXt', 'Darknet', 'ResNeSt', 'TridentResNet', 'CSPDarknet',
    'SwinTransformer', 'PyramidVisionTransformer',
    'PyramidVisionTransformerV2', 'EfficientNet', 'MyModel'
]


In [None]:
!mkdir configs/my_model

In [None]:
%%writefile configs/my_model/my_model_fpn_1x_coco.py
_base_ = [
    '../_base_/schedules/schedule_1x.py', 
    '../_base_/default_runtime.py'
]

# model settings
model = dict(
    type='FasterRCNN',
    backbone=dict(
        type='MyModel',
        in_channels=3),
    neck=dict(
        type='FPN',
        in_channels=[128, 256, 512],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=80,
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0., 0., 0., 0.],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),

    # model training and testing settings
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_pre=2000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100)
        # soft-nms is also supported for rcnn testing
        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
    ))

# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data_pets/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train.json',
        img_prefix=data_root + 'train/',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val.json',
        img_prefix=data_root + 'valid/',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_test.json',
        img_prefix=data_root + 'test/',
        pipeline=test_pipeline))
evaluation = dict(interval=1, metric='bbox')


In [None]:
! python tools/train.py configs/my_model/my_model_fpn_1x_coco.py