# EfficientDet Training On A Custom Dataset

In [1]:
!pip install pycocotools numpy opencv-python tqdm tensorboard tensorboardX pyyaml webcolors matplotlib

Collecting tensorboardX
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboardX
Successfully installed tensorboardX-2.6.2.2


### 1. Prepare Custom Dataset/Pretrained Weights

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import sys


# 현재 디렉토리 확인
print("현재 작업 디렉토리:", os.getcwd())

# 이동할 디렉토리 지정 (구글 드라이브 경로 내)
target_dir = "/content/drive/MyDrive/2025Spring-DL/Yet-Another-EfficientDet-Pytorch"

# 존재 여부 확인 후 이동
if os.path.exists(target_dir):
    os.chdir(target_dir)
    sys.path.append(target_dir)
    print("디렉토리 이동 완료:", os.getcwd())
else:
    print("디렉토리가 존재하지 않습니다:", target_dir)

현재 작업 디렉토리: /content
디렉토리 이동 완료: /content/drive/MyDrive/2025Spring-DL/Yet-Another-EfficientDet-Pytorch


In [4]:
current_dir = target_dir

# ------------------------------
# prepare dataset (하위 폴더만)
# ------------------------------
datasets_dir = os.path.join(current_dir, "datasets")

if os.path.exists(datasets_dir) and os.path.isdir(datasets_dir):
    folders = [item for item in os.listdir(datasets_dir)
               if os.path.isdir(os.path.join(datasets_dir, item))]
    print(f"\n# datasets 디렉토리 내 폴더 목록:")
    for folder in folders:
        print("  -", folder)
else:
    print(f"# Fail")


# ------------------------------
# prepare pretrained weights (하위 파일만)
# ------------------------------
weights_dir = os.path.join(current_dir, "weights")

if os.path.exists(weights_dir) and os.path.isdir(weights_dir):
    files = [item for item in os.listdir(weights_dir)
             if os.path.isfile(os.path.join(weights_dir, item))]
    print(f"\n# weights 디렉토리 내 파일 목록:")
    for file in files:
        print("  -", file)
else:
    print(f"# Fail")


# prepare project file projects/household.yml
# showing its contents here
print("\n# household.yml 파일 내용:")
! cat projects/household.yml


# datasets 디렉토리 내 폴더 목록:
  - household

# weights 디렉토리 내 파일 목록:
  - efficientdet-d0.pth

# household.yml 파일 내용:
project_name: household  # also the folder name of the dataset that under data_path folder
train_set: train
val_set: val
num_gpus: 1

# mean and std in RGB order, actually this part should remain unchanged as long as your dataset is similar to coco.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]

# this anchor is adapted to the dataset
anchors_scales: '[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]'
anchors_ratios: '[(0.7, 1.4), (1.0, 1.0), (1.5, 0.7)]'

obj_list: [
    'HousePlant', 'Bed', 'CellPhone', 'Pan', 'SideTable', 'Pencil', 'TissueBox', 'Book', 'Drawer',
    'Television', 'BaseballBat', 'Painting', 'GarbageBag', 'DiningTable', 'AlarmClock', 'Cabinet',
    'Shelf', 'Lettuce', 'Desk', 'SprayBottle', 'Sofa', 'Pen', 'GarbageCan', 'Chair', 'ArmChair',
    'BasketBall', 'Box', 'Toilet', 'Watch', 'TennisRacket', 'Sink', 'Laptop', 'RemoteControl',
    'Dresser', '

### 2. Training

훈련을 위한 명령어들..

In [None]:
# with a coco-pretrained, you can freeze the backbone and train heads only
# to speed up training and help convergence.

! python train.py -c 0 -p household \
    --batch_size 32 --lr 1e-3 --num_epochs 10 \
    --load_weights ./weights/efficientdet-d0.pth \
    --head_only False

loading annotations into memory...
Done (t=1.14s)
creating index...
index created!
loading annotations into memory...
Done (t=0.57s)
creating index...
index created!
	size mismatch for classifier.header.pointwise_conv.conv.weight: copying a param with shape torch.Size([810, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([837, 64, 1, 1]).
	size mismatch for classifier.header.pointwise_conv.conv.bias: copying a param with shape torch.Size([810]) from checkpoint, the shape in current model is torch.Size([837]).
[Info] loaded weights: efficientdet-d0.pth, resuming checkpoint from step: 0
[Info] freezed backbone
Step: 12. Epoch: 0/10. Iteration: 13/347. Cls loss: 211004.03125. Reg loss: 2.87665. Total loss: 211006.90625:   4% 13/347 [01:08<08:52,  1.60s/it]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7e21f8474fe0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618,

In [None]:
# 이어서 학습하기: --load_weights last

! python train.py -c 0 -p household \
    --lr 1e-3 --batch_size 16 \
    --load_weights last \
    --num_epochs 16 --save_interval 100 \
    --head_only True

### 3. Evaluation

In [None]:
# get latest weight file
%cd logs/household
weight_file = !ls -Art | grep efficientdet
%cd ../..

# uncomment the next line to specify a weight file
#weight_file[-1] = 'efficientdet-d0_49_1400.pth'

! python coco_eval.py -c 0 -p household \
    -w "logs/household/{weight_file[-1]}"

In [None]:
# Or just:
! python coco_eval.py -c 0 -p household \
    -w #./path/to/your/weights

### 4. Visualize

In [None]:

import torch
from torch.backends import cudnn

from backbone import EfficientDetBackbone
import cv2
import matplotlib.pyplot as plt
import numpy as np

from efficientdet.utils import BBoxTransform, ClipBoxes
from utils.utils import preprocess, invert_affine, postprocess


compound_coef = 0
force_input_size = None  # set None to use default size
img_path = 'datasets/household/val/4_teleport_42.jpg'

threshold = 0.2
iou_threshold = 0.2

use_cuda = True
use_float16 = False
cudnn.fastest = True
cudnn.benchmark = True

obj_list = [
    'HousePlant', 'Bed', 'CellPhone', 'Pan', 'SideTable', 'Pencil', 'TissueBox', 'Book', 'Drawer',
    'Television', 'BaseballBat', 'Painting', 'GarbageBag', 'DiningTable', 'AlarmClock', 'Cabinet',
    'Shelf', 'Lettuce', 'Desk', 'SprayBottle', 'Sofa', 'Pen', 'GarbageCan', 'Chair', 'ArmChair',
    'BasketBall', 'Box', 'Toilet', 'Watch', 'TennisRacket', 'Sink', 'Laptop', 'RemoteControl',
    'Dresser', 'Statue', 'Candle', 'TVStand', 'Stool', 'Pillow', 'DeskLamp', 'SinkBasin', 'Fridge',
    'Cart', 'Egg', 'DishSponge', 'SaltShaker', 'Plunger', 'CreditCard', 'Mug', 'CounterTop',
    'SoapBottle', 'Spatula', 'TeddyBear', 'PaperTowelRoll', 'PepperShaker', 'Bottle', 'Vase',
    'Plate', 'Pot', 'Fork', 'Tomato', 'Faucet', 'FloorLamp', 'Apple', 'Knife', 'Newspaper',
    'LaundryHamper', 'Ladle', 'Kettle', 'KeyChain', 'ToiletPaper', 'Bowl', 'DogBed', 'ClothesDryer',
    'Potato', 'Safe', 'Microwave', 'ButterKnife', 'Bread', 'Toaster', 'CoffeeMachine', 'RoomDecor',
    'CoffeeTable', 'WineBottle', 'Cup', 'VacuumCleaner', 'Cloth', 'Ottoman', 'Spoon', 'SoapBar',
    'Boots', 'TableTopDecor', 'Dumbbell'
]


# tf bilinear interpolation is different from any other's, just make do
input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

if use_cuda:
    x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
else:
    x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),

                             # replace this part with your project's anchor config
                             ratios=[(0.7, 1.4), (1.0, 1.0), (1.5, 0.7)],
                             scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])

# Need to modify the weight file
model.load_state_dict(torch.load('logs/household/'+weight_file[-1]))
model.requires_grad_(False)
model.eval()

if use_cuda:
    model = model.cuda()
if use_float16:
    model = model.half()

with torch.no_grad():
    features, regression, classification, anchors = model(x)

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    out = postprocess(x,
                      anchors, regression, classification,
                      regressBoxes, clipBoxes,
                      threshold, iou_threshold)

out = invert_affine(framed_metas, out)

for i in range(len(ori_imgs)):
    if len(out[i]['rois']) == 0:
        continue
    ori_imgs[i] = ori_imgs[i].copy()
    for j in range(len(out[i]['rois'])):
        (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int)
        cv2.rectangle(ori_imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
        obj = obj_list[out[i]['class_ids'][j]]
        score = float(out[i]['scores'][j])

        cv2.putText(ori_imgs[i], '{}, {:.3f}'.format(obj, score),
                    (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (255, 255, 0), 1)

        plt.imshow(ori_imgs[i])

