In [1]:
suffix = "122523" # 本次实验ID

In [2]:
import detectron2
from pathlib import Path
import random, cv2, os
import matplotlib.pyplot as plt
import numpy as np
import pycocotools.mask as mask_util
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
from detectron2.structures import polygons_to_bitmask
from detectron2.evaluation import inference_on_dataset, print_csv_format
from detectron2.utils import comm
setup_logger()

data_dir = '/home/xm/workspace/sartorius-cell-instance-segmentation' # 数据集地址
live_data_dir = f"{data_dir}/LIVECell_dataset_2021/images" # livecell数据集地址

### Load the LIVECell data

In [None]:
cfg = get_cfg() # 生成detectron2的Config
register_coco_instances('sartorius_train',{}, f'{data_dir}/livecell_annotations_train.json', live_data_dir) # 注册livecell train数据集
register_coco_instances('sartorius_val',{},f'{data_dir}/livecell_annotations_val.json', live_data_dir) # 注册livecell valid数据集
register_coco_instances('sartorius_test',{}, f'{data_dir}/livecell_annotations_test.json', live_data_dir) # 注册livecell test数据集
metadata = MetadataCatalog.get('sartorius_train')
train_ds = DatasetCatalog.get('sartorius_train')

### Define evaluator 

In [4]:
def polygon_to_rle(polygon, shape=(520, 704)):
    '''
    polygon格式 转 rle格式
    '''
    #print(polygon)
    mask = polygons_to_bitmask([np.asarray(polygon) + 0.25], shape[0], shape[1])

    rle = mask_util.encode(np.asfortranarray(mask))
    return rle

# Taken from https://www.kaggle.com/theoviel/competition-metric-map-iou
def precision_at(threshold, iou):
    '''
    计算TP, FP, FN
    '''
    matches = iou > threshold
    true_positives = np.sum(matches, axis=1) == 1  # Correct objects
    false_positives = np.sum(matches, axis=0) == 0  # Missed objects
    false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    '''
    计算AP score
    '''
    pred_masks = pred['instances'].pred_masks.cpu().numpy()
    enc_preds = [mask_util.encode(np.asarray(p, order='F')) for p in pred_masks]
    enc_targs = list(map(lambda x:x['segmentation'], targ))
    enc_targs = [polygon_to_rle(enc_targ[0]) for enc_targ in enc_targs]
    ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, ious)
        p = tp / (tp + fp + fn)
        prec.append(p)
    return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    '''
    验证和测试阶段的MAP IOU计算
    '''
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"MaP IoU": np.mean(self.scores)}

# 训练器
class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)

### Train

In [5]:
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # 使用 mask_rcnn_R_50_FPN_3x 模型
cfg.DATASETS.TRAIN = ("sartorius_train", "sartorius_test") # 创建训练集（这里合并了test）
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 10 # cpu线程数
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # checkpoint选择yaml从头训练

In [None]:
cfg.SOLVER.IMS_PER_BATCH = 1 # 每个batch包含的图片数量
cfg.SOLVER.BASE_LR = 0.001 # 初始学习率
cfg.SOLVER.MAX_ITER = 260000 # 迭代总次数
cfg.SOLVER.STEPS = [] # 学习率下降的step位置，这里为空，则学习率保持不变
cfg.SOLVER.CHECKPOINT_PERIOD = (len(DatasetCatalog.get('sartorius_train')) + len(DatasetCatalog.get('sartorius_test'))) // cfg.SOLVER.IMS_PER_BATCH  # 每个epoch都保存一次模型
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # 每个图像用于训练RPN的区域数
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10 # 标签类别数量
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .4 # 最小分数阈值（假设分数在[0, 1]范围内）；选择这个值是为了平衡获得高召回率和没有太多的低精度检测，因为低精度检测会减慢推理的后处理步骤（如NMS）。
cfg.TEST.EVAL_PERIOD = (len(DatasetCatalog.get('sartorius_train')) + len(DatasetCatalog.get('sartorius_test'))) // cfg.SOLVER.IMS_PER_BATCH  # 每个epoch都做一次验证

cfg.OUTPUT_DIR = f"./output/livecell_{suffix}" # 模型/日志 输出保存地址
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
print(cfg.OUTPUT_DIR)

cfg.SEED = 42 # 随机种子

cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.2, 0.5, 1.0, 2.0, 5.0]] # anchor长和宽的比例
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[4], [8],[16],[64],[256]] # anchor的尺寸

cfg.INPUT.MIN_SIZE_TRAIN = (480, 520,  560, 640, 672, 736, 800, 864, 928)  # 训练阶段短边的最小值（多尺寸）
cfg.INPUT.MIN_SIZE_TEST = 928 # 测试阶段短边的最小值
cfg.INPUT.MAX_SIZE_TEST = 1333 # 测试阶段短边的最大值
cfg.INPUT.MAX_SIZE_TRAIN = 1333 # 训练阶段短边的最大值



In [None]:
# 开始训练
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()