# Import libraries and packages

In [None]:
pip install -q 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import os
import sys
import random
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.transforms import functional as TF

from PIL import Image, ImageFilter
import numpy as np
from sklearn.metrics import f1_score
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
import matplotlib.pyplot as plt

import os
import numpy as np
from PIL import Image
from detectron2.structures import BoxMode

# use gpu if available else cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset

In [None]:
import os
from sklearn.model_selection import train_test_split

IM_DIR  = "/kaggle/input/gd-go-c-hcmus-aic-fragment-segmentation-track/train/images"
MSK_DIR = "/kaggle/input/gd-go-c-hcmus-aic-fragment-segmentation-track/train/masks"

all_fnames = sorted(os.listdir(IM_DIR))
train_fnames, val_fnames = train_test_split(all_fnames, test_size=0.2, random_state=42)


## Convert to detectron2's standard input

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog

def register_lazy(split, fnames):
    name = f"fragments_{split}"
    DatasetCatalog.register(
        name,
        lambda fnames=fnames: [
            {
                "file_name": os.path.join(IM_DIR, fn),
                "mask_file": os.path.join(MSK_DIR, os.path.splitext(fn)[0] + ".png"),
                "image_id": idx,
            }
            for idx, fn in enumerate(fnames)
        ]
    )
    MetadataCatalog.get(name).set(thing_classes=["fragment"])

register_lazy("train", train_fnames)
register_lazy("val",   val_fnames)


In [None]:
import numpy as np
import torch
from PIL import Image
from detectron2.data import detection_utils as utils, transforms as T
from detectron2.structures import BoxMode, Instances

def lazy_mapper(dataset_dict):
    dataset_dict = dataset_dict.copy()
    # load img
    img = utils.read_image(dataset_dict["file_name"], format="BGR")
    h, w = img.shape[:2]

    mask = np.array(Image.open(dataset_dict["mask_file"]).convert("RGB"))
    pixels = mask.reshape(-1, 3)
    unique_colors = np.unique(pixels, axis=0)
    instance_colors = [tuple(c) for c in unique_colors if tuple(c) != (0, 0, 0)]

    annos = []
    for color in instance_colors:
        binary_mask = (mask == color).all(axis=2).astype(np.uint8)
        ys, xs = np.where(binary_mask)
        if ys.size == 0:
            continue
        xmin, xmax = xs.min(), xs.max()
        ymin, ymax = ys.min(), ys.max()
        annos.append({
            "bbox": [xmin, ymin, xmax, ymax],
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": binary_mask,
            "category_id": 0
        })
    
    # simple augmentation
    aug_list = [
        T.ResizeShortestEdge(short_edge_length=(400, 600), max_size=600),
        T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
    ]

    aug_input = T.AugInput(img)
    transforms = T.AugmentationList(aug_list)(aug_input)
    img = aug_input.image

    # Transform annotations accordingly
    annos_transformed = []
    for obj in annos:
        obj = obj.copy()
        # transform_instance_annotations does not support bitmask segmentation
        # so we remove it, transform bbox etc., and later re-attach it
        mask = obj.pop("segmentation")
        obj = utils.transform_instance_annotations(obj, transforms, img.shape[:2])
        # apply same transform to the mask
        mask_image = Image.fromarray(mask * 255)
        mask_transformed = transforms.apply_segmentation(np.array(mask_image)) // 255
        obj["segmentation"] = mask_transformed.astype(np.uint8)
        annos_transformed.append(obj)
    
    annos = annos_transformed

    # prepare instances
    instances = utils.annotations_to_instances(
        annos,
        img.shape[:2],
        mask_format="bitmask"
    )

    dataset_dict["image"] = torch.as_tensor(img.transpose(2, 0, 1).copy())
    dataset_dict["instances"] = instances
    dataset_dict["height"] = img.shape[0]
    dataset_dict["width"] = img.shape[1]

    return dataset_dict


# Model config

In [None]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
import os

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
))

cfg.DATASETS.TRAIN            = ("fragments_train",)
cfg.DATASETS.TEST             = ("fragments_val",)
cfg.DATALOADER.NUM_WORKERS    = 4

cfg.MODEL.ROI_HEADS.NUM_CLASSES       = 1
cfg.MODEL.BACKBONE.FREEZE_AT          = 2   # freeze first ResNet block
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128

cfg.INPUT.MIN_SIZE_TRAIN      = (400,)
cfg.INPUT.MIN_SIZE_TEST       = 400

cfg.OUTPUT_DIR                = "./detectron2_output"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

cfg.SOLVER.IMS_PER_BATCH      = 8
cfg.SOLVER.BASE_LR            = 0.002
cfg.SOLVER.MAX_ITER           = 5000
cfg.SOLVER.STEPS              = []       # no LR decay
cfg.SOLVER.CHECKPOINT_PERIOD  = 500
cfg.SOLVER.LOGGING_PERIOD     = 10

# Mixed precision
cfg.SOLVER.AMP.ENABLED        = True

# RPN proposals
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN  = 1200
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 1200
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST   = 700
cfg.MODEL.RPN.POST_NMS_TOPK_TEST  = 700

# turn off val‐set eval during training
cfg.TEST.EVAL_PERIOD         = 0

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4
cfg.TEST.DETECTIONS_PER_IMAGE        = 100

## Save checkpoints

In [None]:
from detectron2.engine.hooks import HookBase

class BestCheckpointer(HookBase):
    def __init__(self, eval_period, trainer):
        self.eval_period = eval_period
        self.trainer = trainer
        self.best_metric = -1

    def after_step(self):
        if (self.trainer.iter + 1) % self.eval_period == 0:
            metrics = self.trainer.storage.latest()
            iou = metrics.get("segm/AP", 0)
            if iou > self.best_metric:
                self.best_metric = iou
                self.trainer.checkpointer.save(f"model_best_{iou:.4f}")

## Start training

In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2.data import build_detection_train_loader
from detectron2.engine import DefaultTrainer

class MyTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=lazy_mapper)

# Then later, instead of DefaultTrainer(cfg):
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# with that set up, estimated training time on Kaggle, with GPU T4 x2, is 5 hours


## Save model configs

In [None]:
with open("./detectron2_output/mask_rcnn_R_50_FPN_3x.yaml", "w") as f:
    f.write(cfg.dump())
print("Wrote:", "./detectron2_output/mask_rcnn_R_50_FPN_3x.yaml")

# Predict

In [None]:
import cv2
import torch
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor

# initialize model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.MODEL.WEIGHTS = "./detectron2_output/model_final.pth"
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

predictor = DefaultPredictor(cfg)

# predict
img_path = "/kaggle/input/gd-go-c-hcmus-aic-fragment-segmentation-track/train/images/002.jpg"
msk_path = "/kaggle/input/gd-go-c-hcmus-aic-fragment-segmentation-track/train/masks/002.png"
imge = Image.open(img_path).convert('RGB')
mske = Image.open(msk_path).convert('RGB')
fig, axs = plt.subplots(1, 2, figsize=(12, 6))   # 1 row, 2 columns
axs[0].imshow(np.array(imge))
axs[0].set_title("Image")
axs[0].axis('off')
axs[1].imshow(np.array(mske))
axs[1].set_title("Mask")
axs[1].axis('off')
plt.tight_layout()
plt.show()

img_bgr = cv2.imread(img_path)
outputs = predictor(img_bgr)
instances = outputs["instances"].to("cpu")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

masks = instances.pred_masks.cpu().numpy()  # (N, H, W)
H, W = masks.shape[1:]

N = masks.shape[0]
cmap = plt.get_cmap('tab20')
colors = (np.array([cmap(i / N)[:3] for i in range(N)]) * 255).astype(np.uint8)  # shape (N, 3)
mosaic = np.zeros((H, W, 3), dtype=np.uint8)
for i, mask in enumerate(masks):
    mosaic[mask] = colors[i % len(colors)]

plt.figure(figsize=(12, 8))
plt.imshow(mosaic)
plt.axis('off')
plt.show()

# CDF Plot

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

def draw_cdf(masks, pixel_size_mm=3):
    # 1) compute diameters in cm
    pixel_size_cm = pixel_size_mm / 10.0
    areas_px      = masks.sum(axis=(1,2))             # pixel² per fragment
    areas_cm2     = areas_px * (pixel_size_cm**2)     # cm²
    diam_cm       = np.sqrt(areas_cm2)                # cm
    N             = len(diam_cm)

    # 2) sort & empirical CDF values
    d_sorted = np.sort(diam_cm)
    y_full   = np.arange(1, N+1) / N * 100            # 1/N, 2/N, …, 1.0 → in %

    # 2b) prepend the (0, 0) anchor so CDF starts at zero
    d_plot = np.concatenate([[0.0], d_sorted])
    y_plot = np.concatenate([[0.0], y_full])

    # 3) key statistics
    Dmin, Dmax, Dmean = d_sorted[0], d_sorted[-1], d_sorted.mean()
    D10, D50, D90     = np.percentile(d_sorted, [10, 50, 90])

    # 4) start plotting
    fig, ax = plt.subplots(figsize=(10,6))
    ax.plot(d_plot, y_plot, '-o', color='blue', label='CDF')
    # If you prefer a step‐plot, comment the line above and uncomment below:
    # ax.step(d_plot, y_plot, where='post', color='blue', label='CDF')

    # 5) horizontal guides @10,50,90%
    for pct, col in zip([10,50,90], ['cyan','magenta','blue']):
        ax.axhline(pct, color=col, linestyle=':', lw=1)
        ax.text(100, pct, f'{pct}%', color=col,
                va=('bottom' if pct==10 else 'top' if pct==90 else 'center'),
                ha='right')

    # 6) vertical percentile lines @D10,D50,D90
    for dval, pct, col in zip([D10,D50,D90], [10,50,90], ['cyan','magenta','blue']):
        ax.axvline(dval, color=col, linestyle=':', lw=2)
        ax.text(dval, pct, f'D{pct}: {dval:.2f}',
                color=col, fontsize=9,
                va=('bottom' if pct==10 else 'top' if pct==90 else 'center'),
                ha='left', backgroundcolor='white')

    # 7) vertical lines @Dmin, mean, Dmax
    ax.axvline(Dmin,  color='green',  linestyle='--', lw=2)
    ax.axvline(Dmean, color='orange', linestyle='--', lw=2)
    ax.axvline(Dmax,  color='red',    linestyle='--', lw=2)

    ax.text(Dmin,  0,   f'Dmin: {Dmin:.2f}',  color='green',
            va='bottom', ha='left', fontsize=9)
    ax.text(Dmean, 50,  f'Average: {Dmean:.2f}', color='orange',
            va='center', ha='left', fontsize=9)
    ax.text(Dmax,  100, f'Dmax: {Dmax:.2f}',  color='red',
            va='top',    ha='right', fontsize=9)

    # 8) axes formatting
    ax.set_xlim(0, 100)
    ax.set_xticks([0,20,40,60,80,100])
    ax.set_ylim(0, 105)
    ax.set_xlabel('Fragment Size (cm)')
    ax.set_ylabel('Cumulative Percentage (%)')
    ax.set_title(
        'Cumulative Distribution Function (CDF) of Fragment Sizes\n'
        f'Total Fragments: {N}'
    )
    ax.grid(linestyle='--', linewidth=0.5)

    # 9) custom legend
    handles = [
        Line2D([0],[0], color='blue',   marker='o', linestyle='-',  label='CDF'),
        Line2D([0],[0], color='green',  linestyle='--', lw=2,       label=f'Dmin: {Dmin:.2f}'),
        Line2D([0],[0], color='orange', linestyle='--', lw=2,       label=f'Average: {Dmean:.2f}'),
        Line2D([0],[0], color='red',    linestyle='--', lw=2,       label=f'Dmax: {Dmax:.2f}')
    ]
    ax.legend(handles=handles, loc='lower right')

    plt.tight_layout()
    plt.show()

    return {
        'N': N,
        'Dmin': Dmin,
        'D10': D10,
        'D50': D50,
        'D90': D90,
        'Average': Dmean,
        'Dmax': Dmax,
        'diameters_cm': diam_cm
    }

# Example call:
stats = draw_cdf(masks, pixel_size_mm=7)