<a href="https://colab.research.google.com/github/botatooo/pp-detection-fracture-recherche/blob/dev/src/fracatlas_efficientdet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'


Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-g8zqne31
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-g8zqne31
  Resolved https://github.com/facebookresearch/detectron2.git to commit 864913f0e57e87a75c8cc0c7d79ecbd774fc669b
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
import torch, detectron2

!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]

print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.1 ; cuda:  cu121
detectron2: 0.6


In [3]:
import os
import json
from torchvision.datasets.utils import download_and_extract_archive

root = "dataset/"
url = "https://figshare.com/ndownloader/files/41725659"
filename = "fracatlas.zip"

# if download:
if not os.path.isdir(os.path.join(root, "FracAtlas")):
    os.makedirs(root, exist_ok=True)
    download_and_extract_archive(
        url,
        os.path.dirname(root),
        filename=filename,
        remove_finished=True,
    )
if not os.path.isdir(root):
    raise RuntimeError(
        "Dataset not found or corrupted. You can use download=True to download it"
    )

with open("dataset/FracAtlas/Annotations/COCO JSON/COCO_fracture_masks.json") as f:
  fracture_masks_data = json.load(f)

fractured_images = [i["file_name"] for i in fracture_masks_data["images"]]
fractured_image_count = len(fractured_images)

training_images = fractured_images[: int(0.9 * fractured_image_count)]
testing_images = fractured_images[int(0.9 * fractured_image_count) :]


os.mkdir("data")
os.mkdir("data/fracatlas")


os.mkdir("data/fracatlas/images")

os.mkdir("data/fracatlas/images/train")
for i in training_images:
  full_path = os.path.abspath(os.path.join("dataset/FracAtlas/images/Fractured", i))
  new_path = os.path.abspath(os.path.join("data/fracatlas/images/train", i))
  os.rename(full_path, new_path)

os.mkdir("data/fracatlas/images/val")
for i in testing_images:
  full_path = os.path.abspath(os.path.join("dataset/FracAtlas/images/Fractured", i))
  new_path = os.path.abspath(os.path.join("data/fracatlas/images/val", i))
  os.rename(full_path, new_path)


os.mkdir("data/fracatlas/labels")

os.mkdir("data/fracatlas/labels/train")
for i in training_images:
  i = i.replace(".jpg", ".txt")
  full_path = os.path.abspath(os.path.join("dataset/FracAtlas/Annotations/YOLO", i))
  new_path = os.path.abspath(os.path.join("data/fracatlas/labels/train", i))
  os.rename(full_path, new_path)

os.mkdir("data/fracatlas/labels/val")
for i in testing_images:
  i = i.replace(".jpg", ".txt")
  full_path = os.path.abspath(os.path.join("dataset/FracAtlas/Annotations/YOLO", i))
  new_path = os.path.abspath(os.path.join("data/fracatlas/labels/val", i))
  os.rename(full_path, new_path)


FileExistsError: [Errno 17] File exists: 'data'

In [4]:
from torchvision.transforms import functional as F
from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

from detectron2.structures import BoxMode

from PIL import Image

import collections
import os
from xml.etree.ElementTree import Element as ET_Element

try:
    from defusedxml.ElementTree import parse as ET_parse
except ImportError:
    from xml.etree.ElementTree import parse as ET_parse
from typing import Any, Dict

def parse_voc_xml(node: ET_Element) -> Dict[str, Any]:
    voc_dict: Dict[str, Any] = {}
    children = list(node)
    if children:
        def_dic: Dict[str, Any] = collections.defaultdict(list)
        for dc in map(parse_voc_xml, children):
            for ind, v in dc.items():
                def_dic[ind].append(v)
        if node.tag == "annotation":
            def_dic["object"] = [def_dic["object"]]
        voc_dict = {
            node.tag: {
                ind: v[0] if len(v) == 1 else v for ind, v in def_dic.items()
            }
        }
    if node.text:
        text = node.text.strip()
        if not children:
            voc_dict[node.tag] = text
    return voc_dict

def get_fracture_dicts(
    root: str,
    image_set: str = "train",
):
    valid_image_sets = ["train", "test"]
    image_set = verify_str_arg(image_set, "image_set", valid_image_sets)

    url = "https://figshare.com/ndownloader/files/41725659"
    filename = "fracatlas.zip"

    # if download:
    if not os.path.isdir("data/FracAtlas"):
        os.makedirs("data", exist_ok=True)
        download_and_extract_archive(
            url,
            os.path.dirname(root),
            filename=filename,
            remove_finished=True,
        )
        for subdir in ["Fractured", "Non_fractured"]:
            dirpath = os.path.join(root, "images")
            subdirpath = os.path.join(dirpath, subdir)
            for f in os.listdir(subdirpath):
                if not f.lower().endswith(".jpg"):
                    continue
                os.rename(os.path.join(subdirpath, f), os.path.join(dirpath, f))
            os.rmdir(subdirpath)
        print(os.listdir("data"))
    if not os.path.isdir(root):
        raise RuntimeError(
            "Dataset not found or corrupted. You can use download=True to download it"
        )

    image_dir = os.path.join(root, "images")
    target_dir = os.path.join(root, "Annotations", "PASCAL VOC")
    all_images = [os.path.splitext(x)[0] for x in os.listdir(image_dir)]

    # remove images without a fracture because we need bounding boxes to train
    all_images = [x for x in all_images if len(parse_voc_xml(ET_parse(os.path.join(target_dir, x + ".xml")).getroot())["annotation"]["object"]) != 0]

    # 90% of images in train, and the last 10% in test
    file_names = []
    if image_set == "train":
        file_names = all_images[: int(0.9 * len(all_images))]
    else:
        file_names = all_images[int(0.9 * len(all_images)) :]

    images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
    targets = [os.path.join(target_dir, x + ".xml") for x in file_names]
    assert len(images) == len(targets)

    dataset_dicts = []
    for index, image in enumerate(images):
        img = Image.open(image).convert("RGB")
        img = F.to_tensor(img)
        item = parse_voc_xml(ET_parse(targets[index]).getroot())

        objects = [
            {
                "bbox": [
                    int(obj["bndbox"]["xmin"]),
                    int(obj["bndbox"]["ymin"]),
                    int(obj["bndbox"]["xmax"]),
                    int(obj["bndbox"]["ymax"]),
                ],
                "bbox_mode": BoxMode.XYXY_ABS,
                "category_id": 0,
            }
            for obj in item["annotation"]["object"]
        ]

        target = {}
        target["file_name"] = images[index]
        target["image_id"] = index
        target["width"] = int(item["annotation"]["size"]["width"])
        target["height"] = int(item["annotation"]["size"]["height"])
        target["annotations"] = objects
        dataset_dicts.append(target)
    return dataset_dicts


In [None]:
!DETECTRON2_DATASETS=../data/ python3 train.py --config-file configs/Base-EfficientDet.yaml "DATASETS.TRAIN" "('fracture_train',)" "DATASETS.TEST" "('fracture_test',)"

In [5]:
!git clone https://github.com/mtroym/EfficientDet.detectron2
%cd "EfficientDet.detectron2"

import logging
import os
from collections import OrderedDict

import detectron2.utils.comm as comm
import torch
from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer
from detectron2.config import get_cfg
from detectron2.data import (
    MetadataCatalog,
    build_detection_test_loader,
    build_detection_train_loader,
)
from detectron2.engine import default_argument_parser, default_setup, launch
from detectron2.evaluation import (

    # CityscapesEvaluator,
    COCOEvaluator,
    COCOPanopticEvaluator,
    DatasetEvaluators,
    LVISEvaluator,
    PascalVOCDetectionEvaluator,
    SemSegEvaluator,
    inference_on_dataset,
    print_csv_format,
)
from detectron2.modeling import build_model
from detectron2.solver import build_lr_scheduler, build_optimizer
from detectron2.utils.events import (
    CommonMetricPrinter,
    EventStorage,
    JSONWriter,
    TensorboardXWriter,
)
from torch.nn.parallel import DistributedDataParallel

from src.config import add_efficientdet_config
from src.data import register_all_df2
from src.modeling.efficientdet_heads import EfficientDetHead
from src.data import DetDatasetMapper

logger = logging.getLogger("detectron2")


# register_all_df2("/mnt/cephfs_new_wj/lab_ad_idea/maoyiming/data")


def get_evaluator(cfg, dataset_name, output_folder=None):
    """
    Create evaluator(s) for a given dataset.
    This uses the special metadata "evaluator_type" associated with each builtin dataset.
    For your own dataset, you can simply create an evaluator manually in your
    script and do not have to worry about the hacky if-else logic here.
    """
    if output_folder is None:
        output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
    evaluator_list = []
    evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
    if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
        evaluator_list.append(
            SemSegEvaluator(
                dataset_name,
                distributed=True,
                num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
                ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
                output_dir=output_folder,
            )
        )
    if evaluator_type in ["coco", "coco_panoptic_seg"]:
        evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
    if evaluator_type == "coco_panoptic_seg":
        evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
    # if evaluator_type == "cityscapes":
    # assert (
    #         torch.cuda.device_count() >= comm.get_rank()
    # ), "CityscapesEvaluator currently do not work with multiple machines."
    # return CityscapesEvaluator(dataset_name)
    if evaluator_type == "pascal_voc":
        return PascalVOCDetectionEvaluator(dataset_name)
    if evaluator_type == "lvis":
        return LVISEvaluator(dataset_name, cfg, True, output_folder)
    if len(evaluator_list) == 0:
        raise NotImplementedError(
            "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type)
        )
    if len(evaluator_list) == 1:
        return evaluator_list[0]
    return DatasetEvaluators(evaluator_list)


def do_test(cfg, model):
    results = OrderedDict()
    for dataset_name in cfg.DATASETS.TEST:
        data_loader = build_detection_test_loader(cfg, dataset_name,
                                                  mapper=DetDatasetMapper(cfg, is_train=False))
        evaluator = get_evaluator(
            cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
        )
        results_i = inference_on_dataset(model, data_loader, evaluator)
        results[dataset_name] = results_i
        if comm.is_main_process():
            logger.info("Evaluation results for {} in csv format:".format(dataset_name))
            print_csv_format(results_i)
    if len(results) == 1:
        results = list(results.values())[0]
    return results


def do_train(cfg, model, resume=False):
    model.train()
    optimizer = build_optimizer(cfg, model)
    scheduler = build_lr_scheduler(cfg, optimizer)

    checkpointer = DetectionCheckpointer(
        model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler
    )
    start_iter = (
            checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1
    )
    max_iter = cfg.SOLVER.MAX_ITER

    periodic_checkpointer = PeriodicCheckpointer(
        checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter
    )

    writers = (
        [
            CommonMetricPrinter(max_iter),
            JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")),
            TensorboardXWriter(cfg.OUTPUT_DIR),
        ]
        if comm.is_main_process()
        else []
    )

    # compared to "train_net.py", we do not support accurate timing and
    # precise BN here, because they are not trivial to implement
    dataset_mapper = DetDatasetMapper(cfg, is_train=True)
    data_loader = build_detection_train_loader(cfg, mapper=dataset_mapper)

    logger.info("builded detection train loader by DatasetMapper")
    logger.info("Starting training from iteration {}".format(start_iter))
    with EventStorage(start_iter) as storage:
        for data, iteration in zip(data_loader, range(start_iter, max_iter)):
            iteration = iteration + 1
            storage.step()

            loss_dict = model(data)
            losses = sum(loss_dict.values())
            assert torch.isfinite(losses).all(), loss_dict

            loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            if comm.is_main_process():
                storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False)
            scheduler.step(None)

            if (
                    cfg.TEST.EVAL_PERIOD > 0
                    and iteration % cfg.TEST.EVAL_PERIOD == 0
                    and iteration != max_iter
            ):
                do_test(cfg, model)
                # Compared to "train_net.py", the test results are not dumped to EventStorage
                comm.synchronize()

            if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter):
                for writer in writers:
                    writer.write()
            periodic_checkpointer.step(iteration)


def setup():
    """
    Create configs and perform basic setups.
    """
    for d in ["train", "test"]:
        DatasetCatalog.register("fracture_" + d, lambda: get_fracture_dicts("data/FracAtlas", d))
        MetadataCatalog.get("fracture_" + d).set(thing_classes=["fracture"])
    fracture_metadata = MetadataCatalog.get("fracture_train")

    cfg = get_cfg()
    cfg = add_efficientdet_config(cfg)
    # cfg.merge_from_file(args.config_file)
    cfg.DATASETS.TRAIN = ("fracture_train",)
    cfg.DATASETS.TEST = ("fracture_test",)
    cfg.freeze()
    # default_setup(
    #     cfg # , args
    # )  # if you don't like any of the default setup, write your own setup code
    return cfg


def main():
    cfg = setup()

    model = build_model(cfg)
    # logger.info("Model:\n{}".format(model))
    # if args.eval_only:
    #     DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
    #         cfg.MODEL.WEIGHTS, resume=args.resume
    #     )
    #     return do_test(cfg, model)

    distributed = comm.get_world_size() > 1
    if distributed:
        model = DistributedDataParallel(
            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False,
            find_unused_parameters=True
        )
    do_train(cfg, model)
    return do_test(cfg, model)


launch(
    main,
    1,
    num_machines=1,
    machine_rank=0,
    dist_url="tcp://127.0.0.1:{}".format(2**15 + 2**14 + hash(os.getuid()) % 2**14),
    # args=(args,),
)

fatal: destination path 'EfficientDet.detectron2' already exists and is not an empty directory.
/content/EfficientDet.detectron2


FileNotFoundError: [Errno 2] No such file or directory: 'output/metrics.json'