In [8]:
!pip install effdet



In [1]:
# 라이브러리 및 모듈 import
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import cv2
import os
import torch
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet
import pandas as pd
from tqdm import tqdm

In [2]:
# CustomDataset class 선언

class CustomDataset(Dataset):
    '''
      data_dir: data가 존재하는 폴더 경로
      transforms: data transform (resize, crop, Totensor, etc,,,)
    '''

    def __init__(self, annotation, data_dir, transforms=None):
        super().__init__()
        self.data_dir = data_dir
        
        # coco annotation 불러오기 (by. coco API)
        self.coco = COCO(annotation)
        self.predictions = {
            "images": self.coco.dataset["images"].copy(),
            "categories": self.coco.dataset["categories"].copy(),
            "annotations": None
        }
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.coco.getImgIds(imgIds=index)

        image_info = self.coco.loadImgs(image_id)[0]
        
        image = cv2.imread(os.path.join(self.data_dir, image_info['file_name']))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        # boxes (x, y, w, h)
        boxes = np.array([x['bbox'] for x in anns])

        # boxex (x_min, y_min, x_max, y_max)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # box별 label
        labels = np.array([x['category_id'] for x in anns])
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        areas = np.array([x['area'] for x in anns])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        
        is_crowds = np.array([x['iscrowd'] for x in anns])
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)

        target = {'boxes': boxes, 'labels': labels, 'image_id': torch.tensor([index]), 'area': areas,
                  'iscrowd': is_crowds}

        # transform
        if self.transforms:
            while True:
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    target['labels'] = torch.tensor(sample['labels'])
                    break
            
        return image, target, image_id
    
    def __len__(self) -> int:
        return len(self.coco.getImgIds())

In [3]:
# Albumentation을 이용, augmentation 선언
def get_train_transform():
    return A.Compose([
        A.Resize(512, 512),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [4]:
# loss 추적
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

def collate_fn(batch):
    return tuple(zip(*batch))

In [5]:
# Effdet config
# https://github.com/rwightman/efficientdet-pytorch/blob/master/effdet/config/model_config.py

# Effdet config를 통해 모델 불러오기
def get_net(checkpoint_path=None):
    
    config = get_efficientdet_config('tf_efficientdet_d1')
    config.num_classes = 10
    config.image_size = (512,512)
    
    config.soft_nms = False
    config.max_det_per_image = 25
    
    net = EfficientDet(config, pretrained_backbone=True)
    net.class_net = HeadNet(config, num_outputs=config.num_classes)
    
    if checkpoint_path:
        checkpoint = torch.load(checkpoint_path)
        net.load_state_dict(checkpoint['model_state_dict'])
        
    return DetBenchTrain(net)
    
# train function
def train_fn(num_epochs, train_data_loader, optimizer, model, device, clip=35):
    loss_hist = Averager()
    model.train()
    
    for epoch in range(num_epochs):
        loss_hist.reset()
        
        for images, targets, image_ids in tqdm(train_data_loader):
            print(images)
            print(images.shape)
            print(image_ids)
            import sys
            sys.exit()
            images = torch.stack(images) # bs, ch, w, h - 16, 3, 512, 512
            images = images.to(device).float()
            boxes = [target['boxes'].to(device).float() for target in targets]
            labels = [target['labels'].to(device).float() for target in targets]
            target = {"bbox": boxes, "cls": labels}

            # calculate loss
            loss, cls_loss, box_loss = model(images, target).values()
            loss_value = loss.detach().item()
            
            loss_hist.send(loss_value)
            
            # backward
            optimizer.zero_grad()
            loss.backward()
            # grad clip
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            
            optimizer.step()

        print(f"Epoch #{epoch+1} loss: {loss_hist.value}")
        torch.save(model.state_dict(), f'epoch_{epoch+1}.pth')

In [6]:
model = get_net()
# model.to(device)
print(model)

DetBenchTrain(
  (model): EfficientDet(
    (backbone): EfficientNetFeatures(
      (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (act1): SiLU(inplace=True)
      (blocks): Sequential(
        (0): Sequential(
          (0): DepthwiseSeparableConv(
            (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
            (act1): SiLU(inplace=True)
            (se): SqueezeExcite(
              (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (act1): SiLU(inplace=True)
              (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (gate): Sigmoid()
            )
            (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
         

In [7]:
def main():
    annotation = '/opt/ml/detection/dataset/train.json'
    data_dir = '/opt/ml/detection/dataset'
    train_dataset = CustomDataset(annotation, data_dir, get_train_transform())

    train_data_loader = DataLoader(
        train_dataset,
        batch_size=1,
        shuffle=True,
        num_workers=1,
        collate_fn=collate_fn
    )
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    model = get_net()
    model.to(device)
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    num_epochs = 50

    loss = train_fn(num_epochs, train_data_loader, optimizer, model, device)

In [8]:
annotation = '/opt/ml/detection/dataset/train.json'
data_dir = '/opt/ml/detection/dataset'
train_dataset = CustomDataset(annotation, data_dir, get_train_transform())

loading annotations into memory...
Done (t=0.08s)
creating index...
index created!


In [9]:
train_dataset[1]

(tensor([[[0.9343, 0.9667, 0.9922,  ..., 0.4618, 0.3843, 0.4010],
          [0.9951, 1.0000, 0.9990,  ..., 0.4480, 0.3618, 0.3422],
          [0.9990, 1.0000, 0.9980,  ..., 0.4186, 0.4235, 0.4137],
          ...,
          [0.9833, 0.9882, 0.9951,  ..., 0.8882, 0.8882, 0.8882],
          [0.9784, 0.9814, 0.9882,  ..., 0.8843, 0.8843, 0.8843],
          [0.9765, 0.9814, 0.9873,  ..., 0.8824, 0.8824, 0.8824]],
 
         [[0.9343, 0.9667, 0.9922,  ..., 0.4225, 0.3451, 0.3618],
          [0.9951, 1.0000, 0.9990,  ..., 0.4088, 0.3225, 0.3029],
          [0.9990, 1.0000, 0.9980,  ..., 0.3794, 0.3843, 0.3745],
          ...,
          [0.9637, 0.9686, 0.9755,  ..., 0.8804, 0.8804, 0.8804],
          [0.9588, 0.9618, 0.9686,  ..., 0.8765, 0.8765, 0.8765],
          [0.9490, 0.9539, 0.9598,  ..., 0.8745, 0.8745, 0.8745]],
 
         [[0.9343, 0.9667, 0.9922,  ..., 0.4265, 0.3490, 0.3657],
          [0.9951, 1.0000, 0.9990,  ..., 0.4127, 0.3265, 0.3069],
          [0.9990, 1.0000, 0.9980,  ...,

In [10]:
if __name__ == '__main__':
    main()

loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
cuda


  0%|          | 0/4883 [00:00<?, ?it/s]

(tensor([[[0.1382, 0.3118, 0.4039,  ..., 0.2853, 0.4157, 0.5245],
         [0.1569, 0.2657, 0.4225,  ..., 0.3618, 0.3931, 0.4098],
         [0.1765, 0.3490, 0.4049,  ..., 0.3216, 0.2853, 0.2882],
         ...,
         [0.6157, 0.5882, 0.6686,  ..., 0.7137, 0.6922, 0.6010],
         [0.6922, 0.6755, 0.7020,  ..., 0.7304, 0.7235, 0.6941],
         [0.6412, 0.7275, 0.6902,  ..., 0.7333, 0.7029, 0.7098]],

        [[0.1186, 0.2922, 0.3843,  ..., 0.2931, 0.4235, 0.5324],
         [0.1373, 0.2461, 0.4029,  ..., 0.3696, 0.4010, 0.4176],
         [0.1569, 0.3294, 0.3853,  ..., 0.3294, 0.2931, 0.2961],
         ...,
         [0.5647, 0.5373, 0.6294,  ..., 0.7294, 0.7078, 0.6167],
         [0.6412, 0.6245, 0.6510,  ..., 0.7461, 0.7392, 0.7098],
         [0.5863, 0.6765, 0.6392,  ..., 0.7490, 0.7186, 0.7255]],

        [[0.1029, 0.2765, 0.3686,  ..., 0.2500, 0.3725, 0.4814],
         [0.1216, 0.2304, 0.3873,  ..., 0.3265, 0.3500, 0.3667],
         [0.1412, 0.3137, 0.3696,  ..., 0.2863, 0.2422, 0




SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
