In [1]:
import os
import copy
import torch
import detectron2
from detectron2.data import detection_utils as utils
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator
from detectron2.data import build_detection_test_loader, build_detection_train_loader

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Register Dataset
try: # register_coco_instances 함수를 사용해 COCO 형식의 데이터셋을 등록
    register_coco_instances('coco_trash_train', {}, '../../dataset/train.json', '../../dataset/')
except AssertionError:
    pass

try: # 
    register_coco_instances('coco_trash_test', {}, '../../dataset/test.json', '../../dataset/')
except AssertionError:
    pass

# MetadataCatalog.get()를 통해 coco_trash_train 데이터셋의 클래스 이름을 지정
MetadataCatalog.get('coco_trash_train').thing_classes = ["General trash", "Paper", "Paper pack", "Metal", 
                                                         "Glass", "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing"]

In [3]:
# config 불러오기
'''
1. get_cfg()를 호출해 기본 설정을 가져오기

2. model_zoo.get_config_file()을 사용해 미리 정의된 Faster R-CNN의 R101 FPN 3x 구성 파일을 로드(이 부분은 변경 가능)

'''



cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml'))

In [4]:
# config 수정하기
'''
1. 데이터를 학습(TRAIN)과 테스트(TEST)로 설정하고, Dataloader의 worker 수를 지정

2. cfg.MODEL.WEIGHTS를 통해 사전 학습된 모델의 가중치를 설정

3. 학습 배치 크기, 학습률, 최대 반복 횟수, 스케줄러 단계 및 감마값을 조정

4. cfg.OUTPUT_DIR를 설정하여 모델 출력 파일을 저장할 디렉터리를 지정

5. cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE와 cfg.MODEL.ROI_HEADS.NUM_CLASSES를 설정하여 이미지당 ROI의 배치 크기와 클래스 수를 설정

6. cfg.TEST.EVAL_PERIOD를 통해 모델 평가 주기를 설정

'''

cfg.DATASETS.TRAIN = ('coco_trash_train',)
cfg.DATASETS.TEST = ('coco_trash_test',)

cfg.DATALOADER.NUM_WOREKRS = 2

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml')

cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 15000
cfg.SOLVER.STEPS = (8000,12000)
cfg.SOLVER.GAMMA = 0.005
cfg.SOLVER.CHECKPOINT_PERIOD = 3000

cfg.OUTPUT_DIR = './output'

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10

cfg.TEST.EVAL_PERIOD = 3000

In [5]:
# mapper - input data를 어떤 형식으로 return할지 (따라서 augmnentation 등 데이터 전처리 포함 됨)
'''
데이터 매퍼 (전처리) 설정:

MyMapper 함수는 입력 데이터에 대한 전처리 방법을 정의

이미지에 랜덤으로 수직 뒤집기, 밝기 및 대비 변환을 적용

변환된 이미지를 텐서로 변환하고 어노테이션을 조정하여 dataset_dict에 추가

'''
import detectron2.data.transforms as T

def MyMapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict['file_name'], format='BGR')
    
    transform_list = [
        T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
        T.RandomBrightness(0.8, 1.8),
        T.RandomContrast(0.6, 1.3)
    ]
    
    image, transforms = T.apply_transform_gens(transform_list, image)
    
    dataset_dict['image'] = torch.as_tensor(image.transpose(2,0,1).astype('float32'))
    
    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop('annotations')
        if obj.get('iscrowd', 0) == 0
    ]
    
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict['instances'] = utils.filter_empty_instances(instances)
    
    return dataset_dict

In [6]:
# trainer - DefaultTrainer를 상속
'''
커스텀 트레이너 정의:

MyTrainer 클래스는 DefaultTrainer를 상속하고 build_train_loader 및 build_evaluator 메서드를 overwrite

build_train_loader: 데이터 로더를 생성할 때 커스텀 매퍼를 사용하도록 함

build_evaluator: 평가를 위해 COCO 평가기를 생성

'''

class MyTrainer(DefaultTrainer):
    
    @classmethod
    def build_train_loader(cls, cfg, sampler=None):
        return build_detection_train_loader(
        cfg, mapper = MyMapper, sampler = sampler
        )
    
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs('./output_eval', exist_ok = True)
            output_folder = './output_eval'
            
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [7]:
# train
os.makedirs(cfg.OUTPUT_DIR, exist_ok = True)

trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[10/06 00:03:10 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

model_final_f6e8b1.pkl: 243MB [00:03, 77.5MB/s]                              
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (11, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (11,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (40, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (40,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_h

[32m[10/06 00:03:13 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[10/06 00:03:26 d2.utils.events]: [0m eta: 1:58:49  iter: 19  total_loss: 3.278  loss_cls: 2.479  loss_box_reg: 0.625  loss_rpn_cls: 0.07086  loss_rpn_loc: 0.03456    time: 0.4776  last_time: 0.4821  data_time: 0.0249  last_data_time: 0.0187   lr: 1.9981e-05  max_mem: 6696M
[32m[10/06 00:03:35 d2.utils.events]: [0m eta: 1:58:13  iter: 39  total_loss: 2.953  loss_cls: 2.048  loss_box_reg: 0.7529  loss_rpn_cls: 0.1068  loss_rpn_loc: 0.05746    time: 0.4758  last_time: 0.4745  data_time: 0.0137  last_data_time: 0.0132   lr: 3.9961e-05  max_mem: 6696M
[32m[10/06 00:03:45 d2.utils.events]: [0m eta: 1:58:07  iter: 59  total_loss: 2.224  loss_cls: 1.3  loss_box_reg: 0.726  loss_rpn_cls: 0.05599  loss_rpn_loc: 0.03283    time: 0.4761  last_time: 0.4728  data_time: 0.0146  last_data_time: 0.0130   lr: 5.9941e-05  max_mem: 6696M
[32m[10/06 00:03:54 d2.utils.events]: [0m eta: 1:58:14  iter: 79  total_loss: 1.656  loss_cls: 0.8164  loss_box_reg: 0.6938  loss_rpn_cls: 0.07511  loss_rpn_

KeyboardInterrupt: 