In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import torch
import numpy as np
import pandas as pd

from torch import optim
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

from transform_util import Compose, RandomHorizontalFlip, PILToTensor, ToDtype, RandomPhotometricDistort
from coco_dataset import COCODataset
from torch.utils.data import DataLoader

from eval_utils.metric import get_inference_metrics_from_df, summarise_inference_metrics
from eval_utils.coco_metric import get_coco_from_dfs
# from eval_utils.seg_metric import SegmentationMetrics

from utils import *
from make_args import Args
from tqdm import tqdm

- args

In [2]:
args = Args('./config/01_ResNet50FPN_Backbone.json')

- Dataloader

In [3]:
def collator(batch):
    return tuple(zip(*batch))

In [4]:
transform = Compose(
    [
        PILToTensor(),
        ToDtype(scale=True, dtype=torch.float)
    ]
)
dataset = COCODataset(args.data_path, train=False, transform=transform)
dataloader = DataLoader(
    dataset, batch_size=1, shuffle=False, drop_last=True, collate_fn=collator, num_workers=args.num_workers
)

loading annotations into memory...
Done (t=0.77s)
creating index...
index created!


- 모델 로드

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
num_classes = len(dataset.new_categories)

weights_path = args.model_save_path
weights = torch.load(weights_path)

if args.backbone == 'resnet50fpn':
    model = maskrcnn_resnet50_fpn(pretrained_backbone=True) # imagenet pretrained
    model.roi_heads.box_predictor = FastRCNNPredictor(
        in_channels=model.roi_heads.box_predictor.cls_score.in_features,
        num_classes=num_classes
    )
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_channels=model.roi_heads.mask_predictor.conv5_mask.in_channels,
        dim_reduced=args.hidden_layer,
        num_classes=num_classes
    )

model.load_state_dict(weights)
model.to(device)
model.eval()



MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu):

In [6]:
_cate_dict = dataset.new_categories
cate_dict = {}
for key, value in _cate_dict.items():
    cate_dict[value] = key

- evaluation
- 2655에서 갑자기 커널 꺼짐

In [7]:
gt_lists = []
pred_lists = []
stop_flag = 0

# pred_mask_list = []
# gt_mask_list = []

with torch.no_grad():
    model.eval()
    fileidx = 1
    for images, targets in tqdm(dataloader, total=len(dataloader)):
        images = [img.to(device) for img in images]
        outputs = model(images)

        # filename (arbitraily)
        filename = str(fileidx).zfill(6) + '.jpg'
        fileidx += 1

        boxes = outputs[0]["boxes"].to("cpu").numpy()
        masks = outputs[0]["masks"].squeeze(1).to("cpu").numpy()
        labels = outputs[0]["labels"].to("cpu").numpy()
        scores = outputs[0]["scores"].to("cpu").numpy()
        # pred_mask_list.append(masks)

        # prediction 작업
        for pred_box, pred_label, pred_score in zip(boxes, labels, scores):
            x, y, w, h = pred_box
            x_min, y_min, x_max, y_max = x, y, x+w, y+h
            pred_list = [x_min, y_min, x_max, y_max, cate_dict[pred_label], pred_score, filename]
            pred_lists.append(pred_list)
    
        # boxes = boxes[scores >= threshold].astype(np.int32)
        # masks = masks[scores >= threshold]
        # labels = labels[scores >= threshold]
        # scores = scores[scores >= threshold]
    
        # # 마스크 처리
        # masks[masks >= threshold] = 1.0
        # masks[masks < threshold] = 0.0
        
        gtboxes = targets[0]["boxes"].numpy()
        gtmasks = targets[0]['masks'].numpy()
        gtlabels = targets[0]["labels"].numpy()
        # gt_mask_list.append(gtmasks)

        # gt 작업
        for gt_box, gt_label in zip(gtboxes, gtlabels):
            x, y, w, h = gt_box
            x_min, y_min, x_max, y_max = x, y, x+w, y+h
            gt_list = [x_min, y_min, x_max, y_max, cate_dict[gt_label], filename]
            gt_lists.append(gt_list)

  "masks": torch.FloatTensor(masks),
100%|██████████| 4541/4541 [23:07<00:00,  3.27it/s]


- Box Score

In [8]:
# make dataframe
gt_lists = np.array(gt_lists)
pred_lists = np.array(pred_lists)

preds_df = pd.DataFrame()
preds_df['xmin'] = np.array(pred_lists[:,0], dtype=np.float32)
preds_df['ymin'] = np.array(pred_lists[:,1], dtype=np.float32)
preds_df['xmax'] = np.array(pred_lists[:,2], dtype=np.float32)
preds_df['ymax'] = np.array(pred_lists[:,3], dtype=np.float32)
preds_df['label'] = pred_lists[:,4]
preds_df['score'] = np.array(pred_lists[:,5], dtype=np.float32)
preds_df['image_name'] = pred_lists[:,6]

labels_df = pd.DataFrame()
labels_df['xmin'] = np.array(gt_lists[:,0], dtype=np.float32)
labels_df['ymin'] = np.array(gt_lists[:,1], dtype=np.float32)
labels_df['xmax'] = np.array(gt_lists[:,2], dtype=np.float32)
labels_df['ymax'] = np.array(gt_lists[:,3], dtype=np.float32)
labels_df['label'] = gt_lists[:,4]
labels_df['image_name'] = gt_lists[:,5]

In [9]:
infer_df = get_inference_metrics_from_df(preds_df, labels_df)
class_summary_df = summarise_inference_metrics(infer_df)

In [10]:
res = get_coco_from_dfs(preds_df, labels_df, False)

creating index...
index created!
Loading and preparing results...
DONE (t=0.50s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=20.16s).
Accumulating evaluation results...
DONE (t=4.52s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.643
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.552
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.207
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.343
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.509
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.419
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.697
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.727
 Average Recall     (AR) @[ IoU=0.

- seg score : 구현 예정