In [1]:
%load_ext autoreload

In [2]:
%autoreload 
import logging
import os
import pathlib
import torch
from ssd.engine.inference import do_evaluation
from ssd.config.defaults import cfg
from ssd.utils.logger import setup_logger
from train import start_train

In [3]:
config_file = "configs/train_rdd2020_server.yaml"

cfg.merge_from_file(config_file)
cfg.freeze()
output_dir = pathlib.Path(cfg.OUTPUT_DIR)
output_dir.mkdir(exist_ok=True, parents=True)

logger = setup_logger("SSD", output_dir)

logger.info("Loaded configuration file {}".format(config_file))
with open(config_file, "r") as cf:
    config_str = "\n" + cf.read()
    logger.info(config_str)
logger.info("Running with config:\n{}".format(cfg))


2021-04-15 20:39:23,414 SSD INFO: Loaded configuration file configs/train_rdd2020_server.yaml
2021-04-15 20:39:23,415 SSD INFO: 
MODEL:
    NUM_CLASSES: 5
    BACKBONE:
        NAME: 'resnet152'
        PRETRAINED: True
        OUT_CHANNELS: [512, 1024, 2048, 1024, 1024, 512]
        INPUT_CHANNELS: 3
INPUT:
    IMAGE_SIZE: [300, 300]
DATASETS:
    TRAIN: ("rdd2020_train",)
    TEST: ("rdd2020_val", )
SOLVER:
    MAX_ITER: 120000
    GAMMA: 0.1
    BATCH_SIZE: 16
    LR: 1e-3
OUTPUT_DIR: 'outputs/rdd2020'
DATASET_DIR: "/work/datasets"
EVAL_STEP: 2000 # Evaluate dataset every eval_step, disabled when eval_step < 0
MODEL_SAVE_STEP: 2000 # Save checkpoint every save_step

2021-04-15 20:39:23,416 SSD INFO: Running with config:
DATASETS:
  TEST: ('rdd2020_val',)
  TRAIN: ('rdd2020_train',)
DATASET_DIR: /work/datasets
DATA_LOADER:
  NUM_WORKERS: 4
  PIN_MEMORY: True
EVAL_STEP: 2000
INPUT:
  IMAGE_SIZE: [300, 300]
  PIXEL_MEAN: [123.675, 116.28, 103.53]
  PIXEL_STD: [1, 1, 1]
LOG_STEP: 10
MOD

In [None]:
model = start_train(cfg)

Detector initialized. Total Number of params:  145.44M
Backbone number of parameters: 142.79M
SSD Head number of parameters: 2.65M
2021-04-15 20:39:47,111 SSD.trainer INFO: No checkpoint found.
Dataset loaded. Subset: train, number of images: 8536
2021-04-15 20:39:47,128 SSD.trainer INFO: Start training ...
2021-04-15 20:40:01,802 SSD.trainer INFO: iter: 000010, lr: 0.00100, total_loss: 13.469 (13.469), reg_loss: 3.287 (3.287), cls_loss: 10.182 (10.182), time: 1.155 (1.155), eta: 1 day, 14:30:42, mem: 10158M
2021-04-15 20:40:04,996 SSD.trainer INFO: iter: 000020, lr: 0.00100, total_loss: 10.351 (11.910), reg_loss: 3.180 (3.234), cls_loss: 7.171 (8.676), time: 0.319 (0.737), eta: 1 day, 0:34:38, mem: 10158M
2021-04-15 20:40:08,202 SSD.trainer INFO: iter: 000030, lr: 0.00100, total_loss: 8.784 (10.868), reg_loss: 3.092 (3.187), cls_loss: 5.692 (7.681), time: 0.321 (0.598), eta: 19:56:41, mem: 10158M
2021-04-15 20:40:11,410 SSD.trainer INFO: iter: 000040, lr: 0.00100, total_loss: 7.953 (1

100%|██████████| 366/366 [05:27<00:00,  1.12it/s]


2021-04-15 20:56:22,609 SSD.inference INFO: mAP: 0.0637
D00             : 0.0250
D10             : 0.0165
D20             : 0.1919
D40             : 0.0215

2021-04-15 20:56:25,879 SSD.trainer INFO: iter: 002010, lr: 0.00100, total_loss: 5.467 (5.995), reg_loss: 2.312 (2.540), cls_loss: 3.155 (3.455), time: 33.753 (0.495), eta: 16:14:04, mem: 10158M
2021-04-15 20:56:29,128 SSD.trainer INFO: iter: 002020, lr: 0.00100, total_loss: 5.454 (5.992), reg_loss: 2.270 (2.539), cls_loss: 3.184 (3.453), time: 0.325 (0.494), eta: 16:12:20, mem: 10158M
2021-04-15 20:56:32,376 SSD.trainer INFO: iter: 002030, lr: 0.00100, total_loss: 5.225 (5.988), reg_loss: 2.201 (2.537), cls_loss: 3.024 (3.451), time: 0.325 (0.494), eta: 16:10:36, mem: 10158M
2021-04-15 20:56:35,621 SSD.trainer INFO: iter: 002040, lr: 0.00100, total_loss: 5.354 (5.985), reg_loss: 2.245 (2.536), cls_loss: 3.108 (3.450), time: 0.324 (0.493), eta: 16:08:54, mem: 10158M
2021-04-15 20:56:38,881 SSD.trainer INFO: iter: 002050, lr: 0.0010

100%|██████████| 366/366 [05:21<00:00,  1.14it/s]


2021-04-15 21:12:37,840 SSD.inference INFO: mAP: 0.0816
D00             : 0.0250
D10             : 0.0218
D20             : 0.2265
D40             : 0.0530

2021-04-15 21:12:41,063 SSD.trainer INFO: iter: 004010, lr: 0.00100, total_loss: 4.997 (5.604), reg_loss: 2.012 (2.346), cls_loss: 2.986 (3.258), time: 32.838 (0.491), eta: 15:50:06, mem: 10158M
2021-04-15 21:12:44,298 SSD.trainer INFO: iter: 004020, lr: 0.00100, total_loss: 5.114 (5.602), reg_loss: 2.116 (2.345), cls_loss: 2.998 (3.257), time: 0.324 (0.491), eta: 15:49:12, mem: 10158M
2021-04-15 21:12:47,544 SSD.trainer INFO: iter: 004030, lr: 0.00100, total_loss: 5.046 (5.601), reg_loss: 1.984 (2.344), cls_loss: 3.062 (3.257), time: 0.325 (0.491), eta: 15:48:19, mem: 10158M
2021-04-15 21:12:50,787 SSD.trainer INFO: iter: 004040, lr: 0.00100, total_loss: 4.951 (5.600), reg_loss: 2.035 (2.343), cls_loss: 2.917 (3.256), time: 0.324 (0.490), eta: 15:47:27, mem: 10158M
2021-04-15 21:12:54,034 SSD.trainer INFO: iter: 004050, lr: 0.0010

100%|██████████| 366/366 [05:10<00:00,  1.18it/s]


2021-04-15 21:28:41,837 SSD.inference INFO: mAP: 0.0934
D00             : 0.0477
D10             : 0.0275
D20             : 0.2202
D40             : 0.0782

2021-04-15 21:28:45,065 SSD.trainer INFO: iter: 006010, lr: 0.00100, total_loss: 4.867 (5.388), reg_loss: 1.927 (2.230), cls_loss: 2.940 (3.158), time: 31.774 (0.488), eta: 15:27:43, mem: 10158M
2021-04-15 21:28:48,290 SSD.trainer INFO: iter: 006020, lr: 0.00100, total_loss: 4.702 (5.387), reg_loss: 1.886 (2.230), cls_loss: 2.816 (3.157), time: 0.323 (0.488), eta: 15:27:07, mem: 10158M
2021-04-15 21:28:51,536 SSD.trainer INFO: iter: 006030, lr: 0.00100, total_loss: 4.867 (5.386), reg_loss: 1.984 (2.229), cls_loss: 2.883 (3.156), time: 0.325 (0.488), eta: 15:26:31, mem: 10158M
2021-04-15 21:28:54,782 SSD.trainer INFO: iter: 006040, lr: 0.00100, total_loss: 4.772 (5.385), reg_loss: 1.919 (2.229), cls_loss: 2.853 (3.156), time: 0.325 (0.488), eta: 15:25:56, mem: 10158M
2021-04-15 21:28:58,028 SSD.trainer INFO: iter: 006050, lr: 0.0010

100%|██████████| 366/366 [04:53<00:00,  1.25it/s]


2021-04-15 21:44:28,137 SSD.inference INFO: mAP: 0.0923
D00             : 0.0566
D10             : 0.0230
D20             : 0.2247
D40             : 0.0650

2021-04-15 21:44:31,364 SSD.trainer INFO: iter: 008010, lr: 0.00100, total_loss: 4.728 (5.240), reg_loss: 1.903 (2.150), cls_loss: 2.824 (3.090), time: 30.011 (0.485), eta: 15:04:22, mem: 10158M
2021-04-15 21:44:34,610 SSD.trainer INFO: iter: 008020, lr: 0.00100, total_loss: 4.584 (5.239), reg_loss: 1.820 (2.149), cls_loss: 2.765 (3.090), time: 0.325 (0.484), eta: 15:03:55, mem: 10158M
2021-04-15 21:44:37,855 SSD.trainer INFO: iter: 008030, lr: 0.00100, total_loss: 4.758 (5.239), reg_loss: 1.933 (2.149), cls_loss: 2.825 (3.090), time: 0.324 (0.484), eta: 15:03:28, mem: 10158M
2021-04-15 21:44:41,098 SSD.trainer INFO: iter: 008040, lr: 0.00100, total_loss: 4.801 (5.238), reg_loss: 1.923 (2.149), cls_loss: 2.878 (3.089), time: 0.324 (0.484), eta: 15:03:01, mem: 10158M
2021-04-15 21:44:44,346 SSD.trainer INFO: iter: 008050, lr: 0.0010

In [5]:
logger.info('Start evaluating...')
torch.cuda.empty_cache()  # speed up evaluating after training finished
do_evaluation(cfg, model)

2021-03-23 15:00:19,172 SSD INFO: Start evaluating...
2021-03-23 15:00:19,326 SSD.inference INFO: Evaluating mnist_detection_val dataset(1000 images):


100%|██████████| 100/100 [00:12<00:00,  7.69it/s]


2021-03-23 15:00:33,086 SSD.inference INFO: mAP: 0.8753
0               : 0.8940
1               : 0.8095
2               : 0.8760
3               : 0.8971
4               : 0.8807
5               : 0.8830
6               : 0.8859
7               : 0.8569
8               : 0.8900
9               : 0.8794



[{'metrics': {'mAP': 0.8752565431542013,
   '0': 0.8939699650469805,
   '1': 0.8094709526574602,
   '2': 0.8760044641851805,
   '3': 0.897146832405082,
   '4': 0.8807326189016644,
   '5': 0.883031870897962,
   '6': 0.8859388314999235,
   '7': 0.8569403048356649,
   '8': 0.8899598842157962,
   '9': 0.8793697068962991}}]