In [4]:
custom_hooks = [dict(type="NumClassCheckHook")]
cudnn_benchmark = True
eval_interval = 500
evaluation = dict(interval=eval_interval, metric=["bbox", "segm"], save_best="segm_mAP")
dist_params = dict(backend="nccl")
log_level = "INFO"
workflow = [("train", 1)]
# runner = dict(type="EpochBasedRunner", max_epochs=10)
runner = dict(type="IterBasedRunner", max_iters=600000)
checkpoint_config = dict(interval=eval_interval, max_keep_ckpts=3)
# ******************************************************** common config

# ******************************************************** schedule config
# optimizer by mmdetection documents
# optimizer = dict(type="SGD", lr=1e-4, momentum=0.9, weight_decay=0.0001)
optimizer = dict(type="Adam", lr=0.0003, weight_decay=0.0001)
optimizer_config = dict()

In [5]:
lr_config = dict(
    policy="step",
    warmup="linear",
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[400000, 500000],
    min_lr=1e-7,
)

In [6]:
dataset_type = "CocoDataset"
data_source = "/data/"
ann_source = "/data/lane_cocostyle/"
classes = ("lane_blue", "lane_shoulder", "lane_white", "lane_yellow")

In [7]:
img_norm_cfg = dict(
    mean=[105.685, 99.015, 101.624], std=[65.58, 65.665, 67.324], to_rgb=True
)

In [8]:
train_pipeline = [
    dict(type="LoadImageFromFile"),
    dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
    dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
    dict(type="RandomFlip", flip_ratio=0.5),
    dict(type="Normalize", **img_norm_cfg),
    dict(type="Pad", size_divisor=32),
    dict(type="DefaultFormatBundle"),
    dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]),
]
test_pipeline = [
    dict(type="LoadImageFromFile"),
    dict(
        type="MultiScaleFlipAug",
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type="Resize", keep_ratio=True),
            dict(type="RandomFlip"),
            dict(type="Normalize", **img_norm_cfg),
            dict(type="Pad", size_divisor=32),
            dict(type="ImageToTensor", keys=["img"]),
            dict(type="Collect", keys=["img"]),
        ],
    ),
]

In [9]:
train_data_set = dict(
    type=dataset_type,
    img_prefix=data_source + "train/IMAGE",
    classes=classes,
    ann_file=ann_source + "train.json",
    pipeline=train_pipeline,
)
valid_data_set = dict(
    type=dataset_type,
    img_prefix=data_source + "valid/IMAGE",
    classes=classes,
    ann_file=ann_source + "valid.json",
    pipeline=test_pipeline,
)
test_data_set = dict(
    type=dataset_type,
    img_prefix=data_source + "test/IMAGE",
    classes=classes,
    ann_file=ann_source + "test.json",
    pipeline=test_pipeline,
)
data = dict(
    samples_per_gpu=4,
    workers_per_gpu=8,
    train=train_data_set,
    val=valid_data_set,
    test=test_data_set,
)

In [11]:
model = dict(
    backbone=dict(
        depth=101,
        init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet101"),
    ),
    neck=dict(
        type="FPN_CARAFE",
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5,
        start_level=0,
        end_level=-1,
        norm_cfg=None,
        act_cfg=None,
        order=("conv", "norm", "act"),
        upsample_cfg=dict(
            type="carafe",
            up_kernel=7,
            up_group=1,
            encoder_kernel=5,
            encoder_dilation=1,
            compressed_channels=32,
        ),
    ),  # CARAFE: Content-Aware ReAssembly of FEatures
    roi_head=dict(
        bbox_roi_extractor=dict(
            type="GenericRoIExtractor",
            aggregation="sum",
            roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=2),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32],
            pre_cfg=dict(
                type="ConvModule",
                in_channels=256,
                out_channels=256,
                kernel_size=5,
                padding=2,
                inplace=False,
            ),
            post_cfg=dict(
                type="GeneralizedAttention",
                in_channels=256,
                spatial_range=-1,
                num_heads=6,
                attention_type="0100",
                kv_stride=2,
            ),
        ),
        mask_roi_extractor=dict(
            type="GenericRoIExtractor",
            roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=2),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32],
            pre_cfg=dict(
                type="ConvModule",
                in_channels=256,
                out_channels=256,
                kernel_size=5,
                padding=2,
                inplace=False,
            ),
            post_cfg=dict(
                type="GeneralizedAttention",
                in_channels=256,
                spatial_range=-1,
                num_heads=6,
                attention_type="0100",
                kv_stride=2,
            ),
        ),  # GRIOE A novel Region of Interest Extraction Layer for Instance Segmentation
        bbox_head=dict(
            num_classes=4,
            loss_cls=dict(
                type="SeesawLoss", p=0.8, q=2.0, num_classes=4, loss_weight=1.0
            ),
        ),  # SeesawLoss Seesaw Loss for Long-Tailed Instance Segmentation
        mask_head=dict(
            num_classes=4,
            upsample_cfg=dict(  # CARAFE: Content-Aware ReAssembly of FEatures
                type="carafe",
                scale_factor=2,
                up_kernel=7,  # 5 -> 7로 고침
                up_group=1,
                encoder_kernel=5,  # 3 -> 5로 고침
                encoder_dilation=1,
                compressed_channels=32,
            ),  # 64 -> 32로 고침
            predictor_cfg=dict(type="NormedConv2d", tempearture=20),
        ),  # SeesawLoss의 Normalized Mask Predication
    ),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type="nms", iou_threshold=0.7),
            min_bbox_size=0,
        ),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type="nms", iou_threshold=0.5),
            max_per_img=100,
            mask_thr_binary=0.5,
        ),
    ),
)


In [1]:
import argparse
import copy
import json
import os
import os.path as osp
import time
from glob import glob

import mmcv
import torch
from mmcv import Config
from mmcv.runner import get_dist_info, init_dist
from mmcv.utils import get_git_hash
from mmdet import __version__
from mmdet.apis import set_random_seed, train_detector
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.utils import collect_env
from PIL import Image
from tqdm import tqdm

from utils import get_area, setup_logger

In [2]:
parser = argparse.ArgumentParser(description="Train a vehicle detector")
parser.add_argument(
    "--config",
    help="train config file path",
    default="./configs/lane_detection_config.py",
)
parser.add_argument(
    "--no-validate",
    action="store_true",
    help="whether not to evaluate the checkpoint during training",
)
group_gpus = parser.add_mutually_exclusive_group()
group_gpus.add_argument(
    "--gpus",
    type=int,
    help="number of gpus to use " "(only applicable to non-distributed training)",
)
group_gpus.add_argument(
    "--gpu-ids",
    type=int,
    nargs="+",
    help="ids of gpus to use " "(only applicable to non-distributed training)",
)
parser.add_argument(
    "--deterministic",
    action="store_true",
    help="whether to set deterministic options for CUDNN backend.",
)
parser.add_argument(
    "--launcher",
    choices=["none", "pytorch", "slurm", "mpi"],
    default="none",
    help="job launcher",
)
parser.add_argument(
    "--no-merging",
    action="store_true",
    help="whether to merge annotations before training. "
    "Set true when there exists merged json file",
)
parser.add_argument("--local_rank", type=int, default=0)
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

usage: ipykernel_launcher.py [-h] [--config CONFIG] [--no-validate] [--gpus GPUS | --gpu-ids GPU_IDS [GPU_IDS ...]]
                             [--deterministic] [--launcher {none,pytorch,slurm,mpi}] [--no-merging]
                             [--local_rank LOCAL_RANK]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\user\AppData\Roaming\jupyter\runtime\kernel-d27c9f35-5292-457b-8b90-6fa9a8ef74d1.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [3]:
# 인자를 설정하기 위한 함수를 정의합니다.
def get_args(notebook_args):
    parser = argparse.ArgumentParser(description="Train a vehicle detector")
    parser.add_argument(
        "--config",
        help="train config file path",
        default="./configs/lane_detection_config.py",
    )
    parser.add_argument(
        "--no-validate",
        action="store_true",
        help="whether not to evaluate the checkpoint during training",
    )
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        "--gpus",
        type=int,
        help="number of gpus to use (only applicable to non-distributed training)",
    )
    group_gpus.add_argument(
        "--gpu-ids",
        type=int,
        nargs="+",
        help="ids of gpus to use (only applicable to non-distributed training)",
    )
    parser.add_argument(
        "--deterministic",
        action="store_true",
        help="whether to set deterministic options for CUDNN backend.",
    )
    parser.add_argument(
        "--launcher",
        choices=["none", "pytorch", "slurm", "mpi"],
        default="none",
        help="job launcher",
    )
    parser.add_argument(
        "--no-merging",
        action="store_true",
        help="whether to merge annotations before training. "
        "Set true when there exists merged json file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    
    # parse_args 대신에 notebook_args를 사용합니다.
    args = parser.parse_args(notebook_args)
    return args

# Jupyter 노트북에서 사용할 인자를 설정합니다.
notebook_args = [
    '--config', './configs/lane_detection_config.py',  # 예시 경로
    '--no-validate',
    '--gpus', '1',
    '--launcher', 'none',
    # '--no-merging',  # 이 옵션을 사용하려면 주석을 해제하세요.
    # '--local_rank', '0',  # 이 옵션을 사용하려면 주석을 해제하고 값을 설정하세요.
]

# 인자를 가져옵니다.
args = get_args(notebook_args)

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def parse_args():
    if "LOCAL_RANK" not in os.environ:
        os.environ["LOCAL_RANK"] = str(args.local_rank)

    return args

In [5]:
def merge_annotation(annot_list, save_path):
    images = []
    annotations = []
    annot_id = 1
    image_id = 0
    for annot in tqdm(annot_list):
        with open(annot, "r") as f:
            json_data = json.load(f)
        object_data = json_data["data_set_info"]["data"]
        img_path = annot.replace("ANNOTATION", "IMAGE").replace("json", "jpg")
        if not os.path.exists(img_path):
            continue
        img = Image.open(img_path)

        img_info = {}
        img_info["id"] = image_id
        file_name = osp.basename(img_path)
        img_info["file_name"] = copy.deepcopy(file_name)
        img_info["height"] = copy.deepcopy(img.size[1])
        img_info["width"] = copy.deepcopy(img.size[0])
        images.append(copy.deepcopy(img_info))
        img.close()

        # annotation은 segmentation / iscrowd, image_id, \
        # category_id, id, bbox, area
        for target in object_data:
            obj_label = target["value"]["object_Label"]
            if "lane_type" in obj_label.keys():
                category = obj_label["lane_type"]
            else:
                continue
            ann = {}
            if category in [
                "lane_blue",
                "lane_shoulder",
                "lane_white",
                "lane_yellow",
            ]:
                points = target["value"]["points"]
                temp_points = copy.deepcopy(points)
                # deepcopy를 해야 둘다 변경되지 않음
                area = get_area(temp_points)
                if category == "lane_blue":
                    ann["category_id"] = 1
                elif category == "lane_shoulder":
                    ann["category_id"] = 2
                elif category == "lane_white":
                    ann["category_id"] = 3
                elif category == "lane_yellow":
                    ann["category_id"] = 4
                segmentation = []
                seg_x = []
                seg_y = []
                for point in points:
                    segmentation.append(point["x"])
                    seg_x.append(point["x"])
                    segmentation.append(point["y"])
                    seg_y.append(point["y"])
                bbox = [
                    min(seg_x),
                    min(seg_y),
                    max(seg_x) - min(seg_x),
                    max(seg_y) - min(seg_y),
                ]
                ann["bbox"] = bbox
                ann["segmentation"] = [segmentation]
                ann["area"] = area
                ann["image_id"] = image_id
                ann["iscrowd"] = 0
                ann["id"] = annot_id
                annot_id += 1
            if ann:
                annotations.append(ann)
        image_id += 1
    merged_data = {
        "images": images,
        "annotations": annotations,
        "categories": [
            {"supercategory": "lane", "id": 1, "name": "lane_blue"},
            {"supercategory": "lane", "id": 2, "name": "lane_shoulder"},
            {"supercategory": "lane", "id": 3, "name": "lane_white"},
            {"supercategory": "lane", "id": 4, "name": "lane_yellow"},
        ],
    }
    with open(save_path, "w", encoding="utf-8") as f:
        json.dump(merged_data, f, ensure_ascii=False, indent="\t")

In [None]:
def main():
    args = parse_args()
    cfg = Config.fromfile(args.config)

    # Set configs
    # import modules from string list.
    if cfg.get("custom_imports", None):
        from mmcv.utils import import_modules_from_strings

        import_modules_from_strings(**cfg["custom_imports"])
    # set cudnn_benchmark
    if cfg.get("cudnn_benchmark", False):
        torch.backends.cudnn.benchmark = True
    if cfg.get("work_dir", None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join(
            "./work_dirs", osp.splitext(osp.basename(args.config))[0]
        )
    if args.gpu_ids is not None:
        cfg.gpu_ids = args.gpu_ids
    # init distributed env first, since logger depends on the dist info.
    if args.launcher == "none":
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
        # re-set gpu_ids with distributed training mode
        _, world_size = get_dist_info()
        cfg.gpu_ids = range(world_size)
    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # dump config
    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
    # init the logger before other steps
    timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
    log_file = osp.join(cfg.work_dir, f"{timestamp}.log")
    logger = setup_logger(log_file, name=__name__)
    logger.info("Start training!!")
    logger.info(f"Read config from {args.config}")

    # build coco-style dataset
    if not args.no_merging:
        for split in ["train", "valid"]:
            annot_list = glob(osp.join(cfg.data_source, split, "ANNOTATION/*.json"))
            logger.info(f"Merging {len(annot_list)} files in {split} dataset.")
            if not osp.exists(cfg.ann_source):
                os.makedirs(cfg.ann_source)
            save_path = osp.join(cfg.ann_source, f"{split}.json")
            merge_annotation(annot_list, save_path)
    for _, _, filename in os.walk(osp.join(cfg.data_source, "train/IMAGE")):
        if len(filename) > 0:
            logger.info(f"Num images => {filename}")
            logger.info(f"File list => {filename}")

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = "\n".join([(f"{k}: {v}") for k, v in env_info_dict.items()])
    dash_line = "-" * 60 + "\n"
    logger.info("Environment info:\n" + dash_line + env_info + "\n" + dash_line)
    meta["env_info"] = env_info
    meta["config"] = cfg.pretty_text
    # log some basic info
    logger.info(f"Distributed training: {distributed}")
    logger.info(f"Config:\n{cfg.pretty_text}")

    # set random seeds
    if cfg.seed is not None:
        logger.info(
            f"Set random seed to {cfg.seed}, " f"deterministic: {args.deterministic}"
        )
        set_random_seed(cfg.seed, deterministic=args.deterministic)
    meta["seed"] = cfg.seed
    meta["exp_name"] = osp.basename(args.config)

    model = build_detector(
        cfg.model, train_cfg=cfg.get("train_cfg"), test_cfg=cfg.get("test_cfg")
    )
    model.init_weights()

    datasets = [build_dataset(cfg.data.train)]
    if len(cfg.workflow) == 2:
        val_dataset = copy.deepcopy(cfg.data.val)
        val_dataset.pipeline = cfg.data.train.pipeline
        datasets.append(build_dataset(val_dataset))
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(
            mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES
        )
    # add an attribute for visualization convenience
    model.CLASSES = datasets[0].CLASSES
    train_detector(
        model,
        datasets,
        cfg,
        distributed=distributed,
        validate=(not args.no_validate),
        timestamp=timestamp,
        meta=meta,
    )
if __name__ == "__main__":
    main()

2023-11-09 17:09:59 [32mINFO     [0m 364504778.py[line:38]: Start training!![0m
2023-11-09 17:09:59 [32mINFO     [0m 364504778.py[line:39]: Read config from ./configs/lane_detection_config.py[0m
2023-11-09 17:09:59 [32mINFO     [0m 364504778.py[line:45]: Merging 0 files in train dataset.[0m
0it [00:00, ?it/s]
2023-11-09 17:09:59 [32mINFO     [0m 364504778.py[line:45]: Merging 0 files in valid dataset.[0m
0it [00:00, ?it/s]
2023-11-09 17:09:59 [32mINFO     [0m 364504778.py[line:62]: Environment info:
------------------------------------------------------------
sys.platform: win32
Python: 3.8.18 (default, Sep 11 2023, 13:39:12) [MSC v.1916 64 bit (AMD64)]
CUDA available: True
GPU 0: NVIDIA GeForce GTX 1650
CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5
NVCC: Not Available
GCC: n/a
PyTorch: 1.7.0+cu110
PyTorch compiling details: PyTorch built with:
  - C++ Version: 199711
  - MSVC 192729112
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 

  0%|          | 0.00/170M [00:00<?, ?B/s]


unexpected key in source state_dict: fc.weight, fc.bias

2023-11-09 17:10:08,368 - mmcv - INFO - initialize RPNHead with init_cfg {'type': 'Normal', 'layer': 'Conv2d', 'std': 0.01}
2023-11-09 17:10:08,386 - mmcv - INFO - initialize Shared2FCBBoxHead with init_cfg [{'type': 'Normal', 'std': 0.01, 'override': {'name': 'fc_cls'}}, {'type': 'Normal', 'std': 0.001, 'override': {'name': 'fc_reg'}}, {'type': 'Xavier', 'override': [{'name': 'shared_fcs'}, {'name': 'cls_fcs'}, {'name': 'reg_fcs'}]}]
2023-11-09 17:10:08,513 - mmcv - INFO - 
backbone.conv1.weight - torch.Size([64, 3, 7, 7]): 
PretrainedInit: load from torchvision://resnet101 
 
2023-11-09 17:10:08,513 - mmcv - INFO - 
backbone.bn1.weight - torch.Size([64]): 
PretrainedInit: load from torchvision://resnet101 
 
2023-11-09 17:10:08,513 - mmcv - INFO - 
backbone.bn1.bias - torch.Size([64]): 
PretrainedInit: load from torchvision://resnet101 
 
2023-11-09 17:10:08,519 - mmcv - INFO - 
backbone.layer1.0.conv1.weight - torch.Size([64,

In [None]:
# pip install yapf==0.40.1