In [1]:
import cv2
import os


In [3]:
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import load_coco_json
from pycocotools.coco import COCO
from copy import deepcopy

img_root = "/workspace/datasets/seg_by_patient/preprocessed/pos_cropped_patch_all_r1_r2"

coco_anno_root = "/workspace/datasets/seg_by_patient/preprocessed/anno/coco_format/"
all_anno = os.path.join(coco_anno_root, "pre_all_complete_merge.json")

def load_coco(json_file, img_root):
    ds_dicts = load_coco_json(json_file, img_root)

    coco = COCO(json_file)
    ids = {img_id: coco.imgs[img_id].get("patient_id") for img_id in coco.imgs}

    out = []
    for d in ds_dicts:
        d = deepcopy(d)
        img_id = d["image_id"]
        d["patient_id"] = ids[img_id]
        out.append(d)

    return out

DATASET_NAME = "all_ds"

# DatasetCatalog.remove("all_ds")
# MetadataCatalog.remove("all_ds")
if DATASET_NAME in DatasetCatalog.list():
    DatasetCatalog.remove(DATASET_NAME)

if DATASET_NAME in MetadataCatalog.list():
    MetadataCatalog.remove(DATASET_NAME)


DatasetCatalog.register(DATASET_NAME, lambda: load_coco(all_anno, img_root))

register_coco_instances(DATASET_NAME, {}, all_anno, img_root)

all_dicts = DatasetCatalog.get("all_ds")

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [10]:
import re
import numpy as np

records = DatasetCatalog.get(DATASET_NAME)

def get_ptn_id(r):
    # if "patient_id" in r:
    return str(r["patient_id"])

    # m = re.search(r"(patient_|P)(\d+)", r["file_name"], re.IGNORECASE)
    # return m.group(2) if m else r["image_id"]  # last resort: unique per image

groups = np.array([get_ptn_id(r) for r in records])  # one per image
idx_all = np.arange(len(records))

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [12]:
groups

array(['raw1_01', 'raw1_01', 'raw1_01', 'raw1_01', 'raw1_03', 'raw1_04',
       'raw1_04', 'raw1_05', 'raw1_05', 'raw1_05', 'raw1_06', 'raw1_06',
       'raw1_06', 'raw1_07', 'raw1_07', 'raw1_07', 'raw1_07', 'raw1_08',
       'raw1_08', 'raw1_08', 'raw1_08', 'raw1_09', 'raw1_10', 'raw1_11',
       'raw1_11', 'raw1_11', 'raw1_11', 'raw1_12', 'raw1_12', 'raw1_12',
       'raw1_12', 'raw1_13', 'raw1_13', 'raw1_13', 'raw1_14', 'raw1_15',
       'raw1_15', 'raw1_16', 'raw1_16', 'raw1_16', 'raw1_17', 'raw1_17',
       'raw1_17', 'raw1_17', 'raw1_18', 'raw1_19', 'raw1_19', 'raw1_20',
       'raw1_21', 'raw1_21', 'raw1_21', 'raw1_21', 'raw1_22', 'raw1_22',
       'raw1_22', 'raw1_23', 'raw1_23', 'raw1_23', 'raw1_23', 'raw1_24',
       'raw1_24', 'raw1_24', 'raw1_24', 'raw1_25', 'raw1_25', 'raw1_26',
       'raw1_26', 'raw1_26', 'raw1_27', 'raw1_27', 'raw1_27', 'raw1_27',
       'raw1_28', 'raw1_29', 'raw1_29', 'raw1_29', 'raw1_29', 'raw1_30',
       'raw1_30', 'raw1_30', 'raw1_30', 'raw1_31', 

In [1]:
from sklearn.model_selection import GroupKFold

def group_kfold_indices(groups, n_plsit=5, rng_seed=42):
    rng = np.random.default_rng(rng_seed)
    uniq = np.array(sorted(set(groups)))
    rng.shuffle(uniq)
    # map each sample to the shuffled order
    order = {g:i for i,g in enumerate(uniq)}
    order_idx = np.array([order[g] for g in groups])

    gkf = GroupKFold(n_splits=n_splits)
    # NOTE: gkf ignores y, uses groups only
    for tr, te in gkf.split(np.zeros_like(order_idx), groups=groups):
        yield tr, te

def register_split(name, base_records, indices):
    subset = [base_records[i] for i in indices]
    if name in DataCatalog.list():
        DatasetCatalog.remove(name)

    DatasetCatalog.register(name, lambda s=subset: deepcopy(s))



In [4]:
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import load_coco_json
from pycocotools.coco import COCO
from copy import deepcopy
from pathlib import Path

img_root = "/workspace/datasets/seg_by_patient/preprocessed/pos_cropped_patch_all_r1_r2"

coco_anno_root = Path("/workspace/datasets/seg_by_patient/preprocessed/anno/coco_format/")
all_anno = coco_anno_root / "pre_all_complete_merge.json"

anno = COCO(all_anno)

anno

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


<pycocotools.coco.COCO at 0x7f57b92fca70>

In [7]:
ids = {img_id: anno.imgs[img_id].get("patient_id") for img_id in anno.imgs}

ids

{0: 'raw1_01',
 1: 'raw1_01',
 2: 'raw1_01',
 3: 'raw1_01',
 4: 'raw1_03',
 5: 'raw1_04',
 6: 'raw1_04',
 7: 'raw1_05',
 8: 'raw1_05',
 9: 'raw1_05',
 10: 'raw1_06',
 11: 'raw1_06',
 12: 'raw1_06',
 13: 'raw1_07',
 14: 'raw1_07',
 15: 'raw1_07',
 16: 'raw1_07',
 17: 'raw1_08',
 18: 'raw1_08',
 19: 'raw1_08',
 20: 'raw1_08',
 21: 'raw1_09',
 22: 'raw1_10',
 23: 'raw1_11',
 24: 'raw1_11',
 25: 'raw1_11',
 26: 'raw1_11',
 27: 'raw1_12',
 28: 'raw1_12',
 29: 'raw1_12',
 30: 'raw1_12',
 31: 'raw1_13',
 32: 'raw1_13',
 33: 'raw1_13',
 34: 'raw1_14',
 35: 'raw1_15',
 36: 'raw1_15',
 37: 'raw1_16',
 38: 'raw1_16',
 39: 'raw1_16',
 40: 'raw1_17',
 41: 'raw1_17',
 42: 'raw1_17',
 43: 'raw1_17',
 44: 'raw1_18',
 45: 'raw1_19',
 46: 'raw1_19',
 47: 'raw1_20',
 48: 'raw1_21',
 49: 'raw1_21',
 50: 'raw1_21',
 51: 'raw1_21',
 52: 'raw1_22',
 53: 'raw1_22',
 54: 'raw1_22',
 55: 'raw1_23',
 56: 'raw1_23',
 57: 'raw1_23',
 58: 'raw1_23',
 59: 'raw1_24',
 60: 'raw1_24',
 61: 'raw1_24',
 62: 'raw1_24',
 6