In [7]:
import os
import copy
import torch
import cv2
from tqdm import tqdm
import pandas as pd
import detectron2
from detectron2.data import detection_utils as utils
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.utils.visualizer import Visualizer

import matplotlib.pyplot as plt


In [2]:
# Register Dataset
try:
    register_coco_instances('coco_trash_test', {}, '../../../dataset/test.json', '../../../dataset/')
except AssertionError:
    pass

In [3]:
# config 불러오기
cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file('COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml'))
cfg.merge_from_file('configs/COCO-Detection-NoisyAnchor/retinanet_R_101_FPN_3x_noisyanchor.yaml')


Loading config configs/COCO-Detection-NoisyAnchor/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


In [10]:
# config 수정하기
cfg.DATASETS.TEST = ('coco_trash_test',)

cfg.DATALOADER.NUM_WOREKRS = 2

cfg.OUTPUT_DIR = './output'

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth') 

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3

In [11]:
# model
predictor = DefaultPredictor(cfg)

# mapper - input data를 어떤 형식으로 return할지
def MyMapper(dataset_dict):
    
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict['file_name'], format='BGR')
    
    dataset_dict['image'] = image
    
    return dataset_dict

# test loader
test_loader = build_detection_test_loader(cfg, 'coco_trash_test', MyMapper)

[32m[01/18 17:01:02 d2.data.datasets.coco]: [0mLoaded 4871 images in COCO format from ../../../dataset/test.json
[32m[01/18 17:01:02 d2.data.common]: [0mSerializing 4871 elements to byte tensors and concatenating them all ...
[32m[01/18 17:01:02 d2.data.common]: [0mSerialized dataset takes 0.53 MiB


In [12]:
# output 뽑은 후 sumbmission 양식에 맞게 후처리 
prediction_strings = []
file_names = []

class_num = 10

for data in tqdm(test_loader):
    
    prediction_string = ''
    
    data = data[0]
    
    # 출력용 이미지
    image = data['image']
    # 출력용 이미지 이름
    file_name = data['file_name'].replace('../../../dataset/', '')
    # 모델의 이미지에대한 예측 아웃풋
    outputs = predictor(image)['instances']
    outputs_v = outputs.to('cpu')
    # Convert CUDA tensor to CPU tensor and then to numpy array
    # boxes = outputs.pred_boxes.tensor.detach().cpu().numpy()

    targets = outputs.pred_classes.cpu().tolist()
    boxes = [i.cpu().detach().numpy() for i in outputs.pred_boxes]
    scores = outputs.scores.cpu().tolist()
    
    # 출력용 이미지 파일 읽어와서 bbox 그리기
    img = cv2.imread(data['file_name'])
    v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TEST[0]), scale=1.2)
    v = v.draw_instance_predictions(outputs_v)
    
    # Save or display the image
    visualized_img = v.get_image()[:, :, ::-1]
    cv2.imwrite(f"./output/img/{file_name}", visualized_img)
    # 저장된 이미지 파일의 경로 출력
    print(f"Saved output image to: output/img/{file_name}")

    # 이미지 출력
    # plt.imshow(visualized_img)
    # plt.axis('off')
    # plt.show()

    for target, box, score in zip(targets,boxes,scores):
        prediction_string += (str(target) + ' ' + str(score) + ' ' + str(box[0]) + ' ' 
        + str(box[1]) + ' ' + str(box[2]) + ' ' + str(box[3]) + ' ')
     
    prediction_strings.append(prediction_string)
    file_names.append(data['file_name'].replace('../../../dataset/',''))

submission = pd.DataFrame()
submission['PredictionString'] = prediction_strings
submission['image_id'] = file_names
submission.to_csv(os.path.join(cfg.OUTPUT_DIR, f'submission_det2.csv'), index=None)

  0%|          | 1/4871 [00:00<1:10:31,  1.15it/s]

Saved output image to: output/img/test/0000.jpg


  0%|          | 2/4871 [00:01<1:04:53,  1.25it/s]

Saved output image to: output/img/test/0001.jpg


  0%|          | 3/4871 [00:02<56:27,  1.44it/s]  

Saved output image to: output/img/test/0002.jpg


  0%|          | 4/4871 [00:02<52:16,  1.55it/s]

Saved output image to: output/img/test/0003.jpg


  0%|          | 5/4871 [00:03<49:47,  1.63it/s]

Saved output image to: output/img/test/0004.jpg


  0%|          | 6/4871 [00:03<48:18,  1.68it/s]

Saved output image to: output/img/test/0005.jpg


  0%|          | 7/4871 [00:04<51:36,  1.57it/s]

Saved output image to: output/img/test/0006.jpg


  0%|          | 8/4871 [00:05<49:31,  1.64it/s]

Saved output image to: output/img/test/0007.jpg


  0%|          | 9/4871 [00:05<48:00,  1.69it/s]

Saved output image to: output/img/test/0008.jpg


  0%|          | 10/4871 [00:06<46:50,  1.73it/s]

Saved output image to: output/img/test/0009.jpg


  0%|          | 11/4871 [00:06<51:09,  1.58it/s]

Saved output image to: output/img/test/0010.jpg





KeyboardInterrupt: 

In [50]:
# output 뽑은 후 sumbmission 양식에 맞게 후처리 
prediction_strings = []
file_names = []

class_num = 10

# 클래스별 고유한 색상 정의 (10개 클래스에 대한 색상)
class_colors = [(0, 255, 0), (0, 0, 255), (255, 0, 0), (0, 255, 255), (255, 255, 0),
                (255, 0, 255), (128, 0, 0), (0, 128, 0), (0, 0, 128), (128, 128, 128)]

for data in tqdm(test_loader):
    
    prediction_string = ''
    
    data = data[0]
    
    # 출력용 이미지
    image = data['image']
    # 출력용 이미지 이름
    file_name = data['file_name'].replace('../../../dataset/', '')
    # 모델의 이미지에대한 예측 아웃풋
    outputs = predictor(image)['instances']

    targets = outputs.pred_classes.cpu().tolist()
    boxes = [i.cpu().detach().numpy() for i in outputs.pred_boxes]
    scores = outputs.scores.cpu().tolist()

    # 선택할 신뢰도의 임계값 설정 (예: 0.5)
    confidence_threshold = 0.2
    
    # 가장 높은 신뢰도의 bbox 선택
    selected_boxes = []
    for target, box, score in zip(targets, boxes, scores):
        if score >= confidence_threshold:
            selected_boxes.append((target, round(score, 2), box))

    # 신뢰도를 기준으로 내림차순으로 정렬
    selected_boxes.sort(key=lambda x: x[1], reverse=True)

    img = cv2.imread(data['file_name'])

    for target, score, box in selected_boxes:
        prediction_string += (str(target) + ' ' + str(score) + ' ' + str(box[0]) + ' ' 
        + str(box[1]) + ' ' + str(box[2]) + ' ' + str(box[3]) + ' ')

        # Draw bounding box
        x1, y1, x2, y2 = map(int, box)
        class_color = class_colors[target % len(class_colors)]
        thickness = 2
        img = cv2.rectangle(img, (x1, y1), (x2, y2), class_color, thickness)
        
        # Label text
        class_name = str(target)
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.5
        font_thickness = 1
        text_size = cv2.getTextSize(class_name, font, font_scale, font_thickness)[0]
        img = cv2.putText(img, class_name, (x1, y1 - 5), font, font_scale, class_color, font_thickness)
    
    cv2.imwrite(f"./output/img/{file_name}", img)
    # 저장된 이미지 파일의 경로 출력
    # print(f"Saved output image to: output/img/{file_name}")

    # # 이미지 출력
    # plt.imshow(img)
    # plt.axis('off')
    # plt.show()
    # break
    prediction_strings.append(prediction_string)
    file_names.append(data['file_name'].replace('../../../dataset/',''))

submission = pd.DataFrame()
submission['PredictionString'] = prediction_strings
submission['image_id'] = file_names
submission.to_csv(os.path.join(cfg.OUTPUT_DIR, f'submission_det2.csv'), index=None)

100%|██████████| 4871/4871 [09:31<00:00,  8.52it/s]
