In [1]:
%load_ext autoreload

In [2]:
%autoreload 
import logging
import os
import pathlib
import torch
from ssd.engine.inference import do_evaluation
from ssd.config.defaults import cfg
from ssd.utils.logger import setup_logger
from train import start_train

In [3]:
config_file = "configs/mnist.yaml"

cfg.merge_from_file(config_file)
cfg.freeze()
output_dir = pathlib.Path(cfg.OUTPUT_DIR)
output_dir.mkdir(exist_ok=True, parents=True)

logger = setup_logger("SSD", output_dir)

logger.info("Loaded configuration file {}".format(config_file))
with open(config_file, "r") as cf:
    config_str = "\n" + cf.read()
    logger.info(config_str)
logger.info("Running with config:\n{}".format(cfg))


2021-03-20 16:28:49,109 SSD INFO: Loaded configuration file configs/mnist.yaml
2021-03-20 16:28:49,111 SSD INFO: 
MODEL:
    NUM_CLASSES: 11
    BACKBONE:
        NAME: 'basic'
        PRETRAINED: False
        OUT_CHANNELS: [512, 1024, 512, 512, 256, 256]
        INPUT_CHANNELS: 3
INPUT:
    IMAGE_SIZE: [300, 300]
DATASETS:
    TRAIN: ("mnist_detection_train", "mnist_detection_val")
    TEST: ("mnist_detection_val", )
SOLVER:
    MAX_ITER: 10000
    GAMMA: 0.1
    BATCH_SIZE: 16
    LR: 2e-3
OUTPUT_DIR: 'outputs/basic'
DATASET_DIR: "datasets"

2021-03-20 16:28:49,113 SSD INFO: Running with config:
DATASETS:
  TEST: ('mnist_detection_val',)
  TRAIN: ('mnist_detection_train', 'mnist_detection_val')
DATASET_DIR: datasets
DATA_LOADER:
  NUM_WORKERS: 4
  PIN_MEMORY: True
EVAL_STEP: 500
INPUT:
  IMAGE_SIZE: [300, 300]
  PIXEL_MEAN: [123.675, 116.28, 103.53]
  PIXEL_STD: [1, 1, 1]
LOG_STEP: 10
MODEL:
  BACKBONE:
    INPUT_CHANNELS: 3
    NAME: basic
    OUT_CHANNELS: (512, 1024, 512, 512, 25

In [4]:
model = start_train(cfg)

Detector initialized. Total Number of params:  36.15M
Backbone number of parameters: 33.94M
SSD Head number of parameters: 2.21M
2021-03-20 16:28:54,037 SSD.trainer INFO: Loading checkpoint from outputs/basic/model_005000.pth
2021-03-20 16:28:54,627 SSD.trainer INFO: Loading optimizer from outputs/basic/model_005000.pth
2021-03-20 16:28:57,930 SSD.trainer INFO: Start training ...
2021-03-20 16:29:14,311 SSD.trainer INFO: iter: 005010, lr: 0.00200, total_loss: 1.738 (1.738), reg_loss: 0.444 (0.444), cls_loss: 1.294 (1.294), time: 1.424 (1.424), eta: 1:58:28, mem: 12450M
2021-03-20 16:29:18,499 SSD.trainer INFO: iter: 005020, lr: 0.00200, total_loss: 1.693 (1.716), reg_loss: 0.434 (0.439), cls_loss: 1.259 (1.277), time: 0.419 (0.922), eta: 1:16:29, mem: 12450M
2021-03-20 16:29:22,697 SSD.trainer INFO: iter: 005030, lr: 0.00200, total_loss: 1.743 (1.725), reg_loss: 0.456 (0.445), cls_loss: 1.288 (1.280), time: 0.420 (0.754), eta: 1:02:29, mem: 12450M
2021-03-20 16:29:26,896 SSD.trainer IN

100%|██████████| 100/100 [00:12<00:00,  8.25it/s]


2021-03-20 16:32:58,008 SSD.inference INFO: mAP: 0.8463
0               : 0.8811
1               : 0.7722
2               : 0.8514
3               : 0.8673
4               : 0.8761
5               : 0.8711
6               : 0.8566
7               : 0.7652
8               : 0.8693
9               : 0.8528

2021-03-20 16:33:02,068 SSD.trainer INFO: iter: 005510, lr: 0.00200, total_loss: 1.752 (1.746), reg_loss: 0.466 (0.458), cls_loss: 1.286 (1.288), time: 1.770 (0.475), eta: 0:35:30, mem: 12450M
2021-03-20 16:33:06,391 SSD.trainer INFO: iter: 005520, lr: 0.00200, total_loss: 1.729 (1.746), reg_loss: 0.458 (0.458), cls_loss: 1.271 (1.287), time: 0.432 (0.474), eta: 0:35:22, mem: 12450M
2021-03-20 16:33:10,718 SSD.trainer INFO: iter: 005530, lr: 0.00200, total_loss: 1.704 (1.745), reg_loss: 0.445 (0.458), cls_loss: 1.259 (1.287), time: 0.433 (0.473), eta: 0:35:13, mem: 12450M
2021-03-20 16:33:15,048 SSD.trainer INFO: iter: 005540, lr: 0.00200, total_loss: 1.694 (1.744), reg_loss: 0.443 (0

100%|██████████| 100/100 [00:09<00:00, 10.04it/s]


2021-03-20 16:36:45,916 SSD.inference INFO: mAP: 0.8555
0               : 0.8817
1               : 0.7767
2               : 0.8335
3               : 0.8671
4               : 0.8766
5               : 0.8594
6               : 0.8649
7               : 0.8452
8               : 0.8867
9               : 0.8635

2021-03-20 16:36:49,976 SSD.trainer INFO: iter: 006010, lr: 0.00200, total_loss: 1.716 (1.729), reg_loss: 0.461 (0.454), cls_loss: 1.255 (1.275), time: 1.543 (0.465), eta: 0:30:56, mem: 12450M
2021-03-20 16:36:54,306 SSD.trainer INFO: iter: 006020, lr: 0.00200, total_loss: 1.655 (1.728), reg_loss: 0.445 (0.453), cls_loss: 1.210 (1.275), time: 0.433 (0.465), eta: 0:30:50, mem: 12450M
2021-03-20 16:36:58,640 SSD.trainer INFO: iter: 006030, lr: 0.00200, total_loss: 1.697 (1.728), reg_loss: 0.450 (0.453), cls_loss: 1.247 (1.275), time: 0.433 (0.465), eta: 0:30:44, mem: 12450M
2021-03-20 16:37:02,977 SSD.trainer INFO: iter: 006040, lr: 0.00200, total_loss: 1.750 (1.728), reg_loss: 0.459 (0

100%|██████████| 100/100 [00:09<00:00, 10.07it/s]


2021-03-20 16:40:33,621 SSD.inference INFO: mAP: 0.8574
0               : 0.8898
1               : 0.8210
2               : 0.8545
3               : 0.8638
4               : 0.8589
5               : 0.8467
6               : 0.8810
7               : 0.8321
8               : 0.8771
9               : 0.8493

2021-03-20 16:40:37,673 SSD.trainer INFO: iter: 006510, lr: 0.00200, total_loss: 1.629 (1.712), reg_loss: 0.414 (0.449), cls_loss: 1.215 (1.263), time: 1.539 (0.462), eta: 0:26:52, mem: 12450M
2021-03-20 16:40:42,009 SSD.trainer INFO: iter: 006520, lr: 0.00200, total_loss: 1.613 (1.712), reg_loss: 0.430 (0.449), cls_loss: 1.183 (1.262), time: 0.434 (0.462), eta: 0:26:47, mem: 12450M
2021-03-20 16:40:46,333 SSD.trainer INFO: iter: 006530, lr: 0.00200, total_loss: 1.688 (1.711), reg_loss: 0.439 (0.449), cls_loss: 1.249 (1.262), time: 0.432 (0.462), eta: 0:26:41, mem: 12450M
2021-03-20 16:40:50,665 SSD.trainer INFO: iter: 006540, lr: 0.00200, total_loss: 1.690 (1.711), reg_loss: 0.449 (0

100%|██████████| 100/100 [00:09<00:00, 10.03it/s]


2021-03-20 16:44:21,464 SSD.inference INFO: mAP: 0.8422
0               : 0.8880
1               : 0.8021
2               : 0.8108
3               : 0.8745
4               : 0.8710
5               : 0.8708
6               : 0.8860
7               : 0.8631
8               : 0.6870
9               : 0.8690

2021-03-20 16:44:25,518 SSD.trainer INFO: iter: 007010, lr: 0.00200, total_loss: 1.663 (1.697), reg_loss: 0.436 (0.446), cls_loss: 1.227 (1.251), time: 1.544 (0.460), eta: 0:22:56, mem: 12450M
2021-03-20 16:44:29,847 SSD.trainer INFO: iter: 007020, lr: 0.00200, total_loss: 1.625 (1.696), reg_loss: 0.431 (0.446), cls_loss: 1.194 (1.250), time: 0.433 (0.460), eta: 0:22:51, mem: 12450M
2021-03-20 16:44:34,173 SSD.trainer INFO: iter: 007030, lr: 0.00200, total_loss: 1.643 (1.696), reg_loss: 0.418 (0.446), cls_loss: 1.225 (1.250), time: 0.433 (0.460), eta: 0:22:46, mem: 12450M
2021-03-20 16:44:38,507 SSD.trainer INFO: iter: 007040, lr: 0.00200, total_loss: 1.681 (1.696), reg_loss: 0.437 (0

100%|██████████| 100/100 [00:09<00:00, 10.10it/s]


2021-03-20 16:48:08,961 SSD.inference INFO: mAP: 0.8634
0               : 0.8854
1               : 0.8102
2               : 0.8647
3               : 0.8746
4               : 0.8686
5               : 0.8634
6               : 0.8778
7               : 0.8445
8               : 0.8841
9               : 0.8611

2021-03-20 16:48:13,001 SSD.trainer INFO: iter: 007510, lr: 0.00200, total_loss: 1.580 (1.681), reg_loss: 0.430 (0.442), cls_loss: 1.150 (1.239), time: 1.531 (0.459), eta: 0:19:03, mem: 12450M
2021-03-20 16:48:17,329 SSD.trainer INFO: iter: 007520, lr: 0.00200, total_loss: 1.672 (1.681), reg_loss: 0.449 (0.442), cls_loss: 1.222 (1.239), time: 0.433 (0.459), eta: 0:18:58, mem: 12450M
2021-03-20 16:48:21,656 SSD.trainer INFO: iter: 007530, lr: 0.00200, total_loss: 1.618 (1.680), reg_loss: 0.442 (0.442), cls_loss: 1.177 (1.238), time: 0.433 (0.459), eta: 0:18:54, mem: 12450M
2021-03-20 16:48:25,979 SSD.trainer INFO: iter: 007540, lr: 0.00200, total_loss: 1.616 (1.680), reg_loss: 0.415 (0

100%|██████████| 100/100 [00:09<00:00, 10.01it/s]


2021-03-20 16:51:56,724 SSD.inference INFO: mAP: 0.8627
0               : 0.8934
1               : 0.7490
2               : 0.8630
3               : 0.8755
4               : 0.8857
5               : 0.8744
6               : 0.8778
7               : 0.8554
8               : 0.8868
9               : 0.8662

2021-03-20 16:52:00,792 SSD.trainer INFO: iter: 008010, lr: 0.00200, total_loss: 1.581 (1.667), reg_loss: 0.428 (0.439), cls_loss: 1.153 (1.228), time: 1.548 (0.459), eta: 0:15:12, mem: 12450M
2021-03-20 16:52:05,133 SSD.trainer INFO: iter: 008020, lr: 0.00200, total_loss: 1.602 (1.667), reg_loss: 0.422 (0.439), cls_loss: 1.180 (1.228), time: 0.434 (0.459), eta: 0:15:08, mem: 12450M
2021-03-20 16:52:09,473 SSD.trainer INFO: iter: 008030, lr: 0.00200, total_loss: 1.529 (1.666), reg_loss: 0.393 (0.439), cls_loss: 1.137 (1.228), time: 0.434 (0.459), eta: 0:15:03, mem: 12450M
2021-03-20 16:52:13,805 SSD.trainer INFO: iter: 008040, lr: 0.00200, total_loss: 1.623 (1.666), reg_loss: 0.425 (0

100%|██████████| 100/100 [00:10<00:00,  9.96it/s]


2021-03-20 16:55:44,913 SSD.inference INFO: mAP: 0.8630
0               : 0.8980
1               : 0.7672
2               : 0.8793
3               : 0.8883
4               : 0.8889
5               : 0.8276
6               : 0.8867
7               : 0.8191
8               : 0.9004
9               : 0.8743

2021-03-20 16:55:48,989 SSD.trainer INFO: iter: 008510, lr: 0.00200, total_loss: 1.566 (1.654), reg_loss: 0.419 (0.436), cls_loss: 1.147 (1.218), time: 1.559 (0.458), eta: 0:11:22, mem: 12450M
2021-03-20 16:55:53,325 SSD.trainer INFO: iter: 008520, lr: 0.00200, total_loss: 1.556 (1.653), reg_loss: 0.407 (0.435), cls_loss: 1.149 (1.218), time: 0.434 (0.458), eta: 0:11:18, mem: 12450M
2021-03-20 16:55:57,656 SSD.trainer INFO: iter: 008530, lr: 0.00200, total_loss: 1.565 (1.653), reg_loss: 0.416 (0.435), cls_loss: 1.149 (1.218), time: 0.433 (0.458), eta: 0:11:13, mem: 12450M
2021-03-20 16:56:02,006 SSD.trainer INFO: iter: 008540, lr: 0.00200, total_loss: 1.601 (1.653), reg_loss: 0.444 (0

100%|██████████| 100/100 [00:11<00:00,  8.36it/s]


2021-03-20 16:59:35,068 SSD.inference INFO: mAP: 0.8234
0               : 0.8998
1               : 0.2848
2               : 0.8651
3               : 0.8846
4               : 0.9007
5               : 0.8878
6               : 0.8707
7               : 0.8615
8               : 0.8988
9               : 0.8805

2021-03-20 16:59:39,126 SSD.trainer INFO: iter: 009010, lr: 0.00200, total_loss: 1.550 (1.643), reg_loss: 0.415 (0.433), cls_loss: 1.136 (1.210), time: 1.752 (0.459), eta: 0:07:34, mem: 12450M
2021-03-20 16:59:43,461 SSD.trainer INFO: iter: 009020, lr: 0.00200, total_loss: 1.577 (1.643), reg_loss: 0.416 (0.433), cls_loss: 1.161 (1.210), time: 0.434 (0.459), eta: 0:07:29, mem: 12450M
2021-03-20 16:59:47,797 SSD.trainer INFO: iter: 009030, lr: 0.00200, total_loss: 1.494 (1.643), reg_loss: 0.391 (0.433), cls_loss: 1.103 (1.210), time: 0.434 (0.458), eta: 0:07:24, mem: 12450M
2021-03-20 16:59:52,138 SSD.trainer INFO: iter: 009040, lr: 0.00200, total_loss: 1.616 (1.642), reg_loss: 0.423 (0

100%|██████████| 100/100 [00:10<00:00,  9.97it/s]


2021-03-20 17:03:22,932 SSD.inference INFO: mAP: 0.8710
0               : 0.8946
1               : 0.8234
2               : 0.8575
3               : 0.8847
4               : 0.8847
5               : 0.8777
6               : 0.8803
7               : 0.8583
8               : 0.8755
9               : 0.8731

2021-03-20 17:03:26,986 SSD.trainer INFO: iter: 009510, lr: 0.00200, total_loss: 1.506 (1.632), reg_loss: 0.406 (0.430), cls_loss: 1.100 (1.201), time: 1.555 (0.458), eta: 0:03:44, mem: 12450M
2021-03-20 17:03:31,313 SSD.trainer INFO: iter: 009520, lr: 0.00200, total_loss: 1.524 (1.631), reg_loss: 0.403 (0.430), cls_loss: 1.121 (1.201), time: 0.433 (0.458), eta: 0:03:39, mem: 12450M
2021-03-20 17:03:35,646 SSD.trainer INFO: iter: 009530, lr: 0.00200, total_loss: 1.504 (1.631), reg_loss: 0.397 (0.430), cls_loss: 1.107 (1.201), time: 0.433 (0.458), eta: 0:03:35, mem: 12450M
2021-03-20 17:03:39,970 SSD.trainer INFO: iter: 009540, lr: 0.00200, total_loss: 1.540 (1.631), reg_loss: 0.408 (0

100%|██████████| 100/100 [00:09<00:00, 10.08it/s]


2021-03-20 17:07:10,380 SSD.inference INFO: mAP: 0.8753
0               : 0.8940
1               : 0.8096
2               : 0.8760
3               : 0.8971
4               : 0.8807
5               : 0.8830
6               : 0.8859
7               : 0.8569
8               : 0.8900
9               : 0.8794

2021-03-20 17:07:10,429 SSD.trainer INFO: Saving checkpoint to outputs/basic/model_final.pth
2021-03-20 17:07:10,822 SSD.trainer INFO: Total training time: 0:38:10 (0.2290 s / it)


In [5]:
logger.info('Start evaluating...')
torch.cuda.empty_cache()  # speed up evaluating after training finished
do_evaluation(cfg, model)

2021-03-20 17:07:19,476 SSD INFO: Start evaluating...
2021-03-20 17:07:19,625 SSD.inference INFO: Evaluating mnist_detection_val dataset(1000 images):


100%|██████████| 100/100 [00:09<00:00, 10.18it/s]


2021-03-20 17:07:30,035 SSD.inference INFO: mAP: 0.8753
0               : 0.8940
1               : 0.8096
2               : 0.8760
3               : 0.8971
4               : 0.8807
5               : 0.8830
6               : 0.8859
7               : 0.8569
8               : 0.8900
9               : 0.8794



[{'metrics': {'mAP': 0.8752613436177272,
   '0': 0.8939699650469805,
   '1': 0.8095949781476657,
   '2': 0.8760044641851805,
   '3': 0.897146832405082,
   '4': 0.880738322792082,
   '5': 0.8830380497363594,
   '6': 0.8859388314999235,
   '7': 0.8568509513659689,
   '8': 0.8899598842157962,
   '9': 0.8793711567822344}}]