In [1]:
%load_ext autoreload

In [2]:
%autoreload 
import logging
import os
import pathlib
import torch
from ssd.engine.inference import do_evaluation
from ssd.config.defaults import cfg
from ssd.utils.logger import setup_logger
from train import start_train

In [3]:
config_file = "configs/train_rdd2020_server.yaml"

cfg.merge_from_file(config_file)
cfg.freeze()
output_dir = pathlib.Path(cfg.OUTPUT_DIR)
output_dir.mkdir(exist_ok=True, parents=True)

logger = setup_logger("SSD", output_dir)

logger.info("Loaded configuration file {}".format(config_file))
with open(config_file, "r") as cf:
    config_str = "\n" + cf.read()
    logger.info(config_str)
logger.info("Running with config:\n{}".format(cfg))


2021-04-14 07:08:51,288 SSD INFO: Loaded configuration file configs/train_rdd2020_server.yaml
2021-04-14 07:08:51,291 SSD INFO: 
MODEL:
    NUM_CLASSES: 5
    BACKBONE:
        NAME: 'ResNet152'
        PRETRAINED: True
        OUT_CHANNELS: [512, 1024, 2048, 1024, 512, 512]
        INPUT_CHANNELS: 3
INPUT:
    IMAGE_SIZE: [300, 300]
DATASETS:
    TRAIN: ("rdd2020_train",)
    TEST: ("rdd2020_val", )
SOLVER:
    MAX_ITER: 120000
    GAMMA: 0.1
    BATCH_SIZE: 16
    LR: 1e-3
OUTPUT_DIR: 'outputs/rdd2020'
DATASET_DIR: "/work/datasets"

2021-04-14 07:08:51,293 SSD INFO: Running with config:
DATASETS:
  TEST: ('rdd2020_val',)
  TRAIN: ('rdd2020_train',)
DATASET_DIR: /work/datasets
DATA_LOADER:
  NUM_WORKERS: 4
  PIN_MEMORY: True
EVAL_STEP: 500
INPUT:
  IMAGE_SIZE: [300, 300]
  PIXEL_MEAN: [123.675, 116.28, 103.53]
  PIXEL_STD: [1, 1, 1]
LOG_STEP: 10
MODEL:
  BACKBONE:
    INPUT_CHANNELS: 3
    NAME: ResNet152
    OUT_CHANNELS: (512, 1024, 2048, 1024, 512, 512)
    PRETRAINED: True
  CENTE

In [4]:
model = start_train(cfg)

Downloading: "https://download.pytorch.org/models/resnet152-b121ed2d.pth" to /home/fredralm/.cache/torch/hub/checkpoints/resnet152-b121ed2d.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=241530880.0), HTML(value='')))


Detector initialized. Total Number of params:  109.88M
Backbone number of parameters: 107.39M
SSD Head number of parameters: 2.49M
2021-04-14 07:09:02,365 SSD.trainer INFO: Loading checkpoint from outputs/rdd2020/model_007000.pth
2021-04-14 07:09:03,768 SSD.trainer INFO: Loading optimizer from outputs/rdd2020/model_007000.pth
Dataset loaded. Subset: train, number of images: 8536
2021-04-14 07:09:03,882 SSD.trainer INFO: Start training ...
2021-04-14 07:09:16,225 SSD.trainer INFO: iter: 007010, lr: 0.00100, total_loss: 4.691 (4.691), reg_loss: 1.842 (1.842), cls_loss: 2.849 (2.849), time: 1.004 (1.004), eta: 1 day, 7:30:15, mem: 9628M
2021-04-14 07:09:18,988 SSD.trainer INFO: iter: 007020, lr: 0.00100, total_loss: 4.818 (4.754), reg_loss: 1.929 (1.885), cls_loss: 2.889 (2.869), time: 0.276 (0.640), eta: 20:05:15, mem: 9628M
2021-04-14 07:09:21,759 SSD.trainer INFO: iter: 007030, lr: 0.00100, total_loss: 4.793 (4.767), reg_loss: 1.885 (1.885), cls_loss: 2.908 (2.882), time: 0.277 (0.519

100%|██████████| 366/366 [04:28<00:00,  1.37it/s]


2021-04-14 07:16:03,981 SSD.inference INFO: mAP: 0.0942
D00             : 0.0384
D10             : 0.0195
D20             : 0.2369
D40             : 0.0820

2021-04-14 07:16:06,802 SSD.trainer INFO: iter: 007510, lr: 0.00100, total_loss: 4.620 (4.769), reg_loss: 1.815 (1.892), cls_loss: 2.805 (2.877), time: 27.344 (0.825), eta: 1 day, 1:46:14, mem: 9721M
2021-04-14 07:16:09,593 SSD.trainer INFO: iter: 007520, lr: 0.00100, total_loss: 4.601 (4.766), reg_loss: 1.819 (1.890), cls_loss: 2.781 (2.875), time: 0.279 (0.814), eta: 1 day, 1:26:26, mem: 9721M


KeyboardInterrupt: 

In [5]:
logger.info('Start evaluating...')
torch.cuda.empty_cache()  # speed up evaluating after training finished
do_evaluation(cfg, model)

2021-03-23 15:00:19,172 SSD INFO: Start evaluating...
2021-03-23 15:00:19,326 SSD.inference INFO: Evaluating mnist_detection_val dataset(1000 images):


100%|██████████| 100/100 [00:12<00:00,  7.69it/s]


2021-03-23 15:00:33,086 SSD.inference INFO: mAP: 0.8753
0               : 0.8940
1               : 0.8095
2               : 0.8760
3               : 0.8971
4               : 0.8807
5               : 0.8830
6               : 0.8859
7               : 0.8569
8               : 0.8900
9               : 0.8794



[{'metrics': {'mAP': 0.8752565431542013,
   '0': 0.8939699650469805,
   '1': 0.8094709526574602,
   '2': 0.8760044641851805,
   '3': 0.897146832405082,
   '4': 0.8807326189016644,
   '5': 0.883031870897962,
   '6': 0.8859388314999235,
   '7': 0.8569403048356649,
   '8': 0.8899598842157962,
   '9': 0.8793697068962991}}]