In [4]:
# !pip install effdet

In [9]:
# 라이브러리 및 모듈 import
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import cv2
import os
import torch
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet
import pandas as pd
from tqdm import tqdm

In [10]:
# CustomDataset class 선언

class CustomDataset(Dataset):
    '''
      data_dir: data가 존재하는 폴더 경로
      transforms: data transform (resize, crop, Totensor, etc,,,)
    '''

    def __init__(self, annotation, data_dir, transforms=None):
        super().__init__()
        self.data_dir = data_dir
        
        # coco annotation 불러오기 (by. coco API)
        self.coco = COCO(annotation)
        self.predictions = {
            "images": self.coco.dataset["images"].copy(),
            "categories": self.coco.dataset["categories"].copy(),
            "annotations": None
        }
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.coco.getImgIds(imgIds=index)

        image_info = self.coco.loadImgs(image_id)[0]
        
        image = cv2.imread(os.path.join(self.data_dir, image_info['file_name']))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        # boxes (x, y, w, h)
        boxes = np.array([x['bbox'] for x in anns])

        # boxex (x_min, y_min, x_max, y_max)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # box별 label
        labels = np.array([x['category_id'] for x in anns])
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        areas = np.array([x['area'] for x in anns])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        
        is_crowds = np.array([x['iscrowd'] for x in anns])
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)

        target = {'boxes': boxes, 'labels': labels, 'image_id': torch.tensor([index]), 'area': areas,
                  'iscrowd': is_crowds}

        # transform
        if self.transforms:
            while True:
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    target['labels'] = torch.tensor(sample['labels'])
                    break
            
        return image, target, image_id, image_info['file_name']
    
    def __len__(self) -> int:
        return len(self.coco.getImgIds())
def collate_fn(batch):
    return tuple(zip(*batch))

In [11]:
# Albumentation을 이용, augmentation 선언
def get_train_transform():
    return A.Compose([
        A.Resize(512, 512),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return A.Compose([
        A.Resize(512, 512),
        ToTensorV2(p=1.0)
    ])

In [12]:
from effdet import DetBenchPredict
import gc

# Effdet config를 통해 모델 불러오기 + ckpt load
def load_net(checkpoint_path=None,):
    config = get_efficientdet_config('tf_efficientdet_d1')
    config.num_classes = 10
    config.image_size = (512,512)
    
    config.soft_nms = False
    config.max_det_per_image = 25
    
    net = EfficientDet(config, pretrained_backbone=False)
    net.class_net = HeadNet(config, num_outputs=config.num_classes)
    
    net = DetBenchPredict(net)
    return net

In [13]:
def get_net(checkpoint_path=None):
    
    config = get_efficientdet_config('tf_efficientdet_d1')
    config.num_classes = 10
    config.image_size = (512,512)
    
    config.soft_nms = False
    config.max_det_per_image = 25
    
    net = EfficientDet(config, pretrained_backbone=True)
    net.class_net = HeadNet(config, num_outputs=config.num_classes)

    return DetBenchTrain(net)

In [15]:
a = load_net()
b = get_net()

In [19]:
print(type(a))
print(type(b)

<class 'effdet.bench.DetBenchTrain'>
<class 'effdet.bench.DetBenchTrain'>


In [5]:
# valid function
def valid_fn(val_data_loader, model, device):
    outputs = []
    for images, image_ids in tqdm(val_data_loader):
        # gpu 계산을 위해 image.to(device)       
        images = torch.stack(images) # bs, ch, w, h 
        images = images.to(device).float()
        output = model(images)
        for out in output:
            outputs.append({'boxes': out.detach().cpu().numpy()[:,:4], 
                            'scores': out.detach().cpu().numpy()[:,4], 
                            'labels': out.detach().cpu().numpy()[:,-1]})
    return outputs

In [33]:
from map_boxes import mean_average_precision_for_boxes

In [34]:
def main():
    annotation = '/opt/ml/detection/dataset/train.json'
    data_dir = '/opt/ml/detection/dataset'
    val_dataset = CustomDataset(annotation, data_dir, get_valid_transform())
    # epoch = 
    checkpoint_path = f'epoch_26_tf_efficientdet_d1.pth'
    score_threshold = 0.1
    val_data_loader = DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        collate_fn=collate_fn
    )
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    model = load_net(checkpoint_path, device)
    
    outputs = []
    for images, targets, image_ids, filename in tqdm(val_data_loader):
        # gpu 계산을 위해 image.to(device)       
        images = torch.stack(images) # bs, ch, w, h 
        images = images.to(device).float()
        output = model(images)
        for out in output:
            outputs.append({'boxes': out.detach().cpu().numpy()[:,:4], 
                            'scores': out.detach().cpu().numpy()[:,4], 
                            'labels': out.detach().cpu().numpy()[:,-1]})  
        new_pred = []
        for i, output in enumerate(outputs):
            for box, score, label in zip(output['boxes'], output['scores'], output['labels']):
                if score > score_threshold:
                    new_pred.append([filename[i], int(label), score, box[0]*2, box[2]*2, box[1]*2, box[3]*2])
        gt = []
        for i, target in enumerate(targets):
            bbox = target["boxes"][i]
            print(bbox)
            gt.append([filename[i], int(target["labels"][i]), bbox[1].item(), bbox[3].item(), bbox[0].item(), bbox[2].item()])
        mean_ap, _ = mean_average_precision_for_boxes(gt, new_pred, iou_threshold=0.5)
        
    submission = pd.DataFrame()
    submission['PredictionString'] = prediction_strings
    submission['image_id'] = file_names
    submission.to_csv(f'submission_effcient_det_d3_50_train_shffle_True.csv', index=None)
    print(submission.head())

In [35]:
if __name__ == '__main__':
    main()

loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
cuda


  0%|          | 0/4883 [00:00<?, ?it/s]

[['train/0000.jpg', 1, 0.30246696, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125]]
[['train/0000.jpg', 1, 0.30246696, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125], ['train/0000.jpg', 2, 0.264304, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125]]
[['train/0000.jpg', 1, 0.30246696, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125], ['train/0000.jpg', 2, 0.264304, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125], ['train/0000.jpg', 6, 0.2618692, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125]]
[['train/0000.jpg', 1, 0.30246696, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125], ['train/0000.jpg', 2, 0.264304, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.5313720703125], ['train/0000.jpg', 6, 0.2618692, 206.68402099609375, 743.9473266601562, 201.6585693359375, 717.531372070312




SystemExit: 