In [1]:
%load_ext autoreload

In [2]:
%autoreload 
import logging
import os
import pathlib
import torch
from ssd.engine.inference import do_evaluation
from ssd.config.defaults import cfg
from ssd.utils.logger import setup_logger
from train import start_train

In [3]:
config_file = "configs/resnet_waymo.yaml"

cfg.merge_from_file(config_file)
cfg.freeze()
output_dir = pathlib.Path(cfg.OUTPUT_DIR)
output_dir.mkdir(exist_ok=True, parents=True)

logger = setup_logger("SSD", output_dir)

logger.info("Loaded configuration file {}".format(config_file))
with open(config_file, "r") as cf:
    config_str = "\n" + cf.read()
    logger.info(config_str)
logger.info("Running with config:\n{}".format(cfg))


2020-04-03 09:27:24,353 SSD INFO: Loaded configuration file configs/resnet_waymo.yaml
2020-04-03 09:27:24,355 SSD INFO: 
MODEL:
    NUM_CLASSES: 5
    BACKBONE:
        NAME: 'resnet'
        PRETRAINED: True
        #TODO: check whether output fetures are correct for resnet 34
        OUT_CHANNELS: [64, 128, 256, 512, 512]
        INPUT_CHANNELS: 3
    PRIORS:
        FEATURE_MAPS: [75, 38, 19, 10, 1]
INPUT:
    IMAGE_SIZE: 300
DATASETS:
    TRAIN: ("waymo_train",)
    TEST: ("waymo_val", )
SOLVER:
    MAX_ITER: 120000
    LR_STEPS: [80000, 100000]
    GAMMA: 0.1
    BATCH_SIZE: 16
    LR: 1e-3
OUTPUT_DIR: 'outputs/resnet'
DATASET_DIR: "datasets"
2020-04-03 09:27:24,356 SSD INFO: Running with config:
DATASETS:
  TEST: ('waymo_val',)
  TRAIN: ('waymo_train',)
DATASET_DIR: datasets
DATA_LOADER:
  NUM_WORKERS: 4
  PIN_MEMORY: True
EVAL_STEP: 500
INPUT:
  IMAGE_SIZE: 300
  PIXEL_MEAN: [123, 117, 104]
LOG_STEP: 10
MODEL:
  BACKBONE:
    INPUT_CHANNELS: 3
    NAME: resnet
    OUT_CHANNELS: 

In [None]:
model = start_train(cfg)

resnet
Detector initialized. Total Number of params:  22.42M
Backbone number of parameters: 21.80M
SSD Head number of parameters: 622.3K
2020-04-03 09:27:27,998 SSD.trainer INFO: No checkpoint found.
17936
Dataset loaded. Subset: train, number of images: 14348
2020-04-03 09:27:29,568 SSD.trainer INFO: Start training ...
2020-04-03 09:27:35,337 SSD.trainer INFO: iter: 000010, lr: 0.00035, total_loss: 22.294 (22.294), reg_loss: 7.742 (7.742), cls_loss: 14.552 (14.552), time: 0.576 (0.576), eta: 19:12:43, mem: 2198M
2020-04-03 09:27:38,761 SSD.trainer INFO: iter: 000020, lr: 0.00036, total_loss: 17.202 (19.748), reg_loss: 7.472 (7.607), cls_loss: 9.730 (12.141), time: 0.343 (0.459), eta: 15:18:47, mem: 2198M
2020-04-03 09:27:42,024 SSD.trainer INFO: iter: 000030, lr: 0.00037, total_loss: 13.804 (17.767), reg_loss: 5.863 (7.026), cls_loss: 7.941 (10.741), time: 0.326 (0.415), eta: 13:49:56, mem: 2198M
2020-04-03 09:27:45,734 SSD.trainer INFO: iter: 000040, lr: 0.00039, total_loss: 12.389 (

100%|██████████| 359/359 [19:35<00:00,  3.11s/it]


2020-04-03 09:50:02,904 SSD.inference INFO: mAP: 0.0028
vehicle         : 0.0083
person          : 0.0000
sign            : nan
cyclist         : 0.0000

2020-04-03 09:50:04,814 SSD.trainer INFO: iter: 000510, lr: 0.00100, total_loss: 8.028 (9.216), reg_loss: 5.118 (5.372), cls_loss: 2.909 (3.845), time: 118.059 (2.657), eta: 3 days, 16:12:05, mem: 2198M
2020-04-03 09:50:08,005 SSD.trainer INFO: iter: 000520, lr: 0.00100, total_loss: 7.854 (9.190), reg_loss: 5.010 (5.365), cls_loss: 2.844 (3.825), time: 0.319 (2.612), eta: 3 days, 14:42:05, mem: 2198M
2020-04-03 09:50:11,838 SSD.trainer INFO: iter: 000530, lr: 0.00100, total_loss: 7.899 (9.166), reg_loss: 5.063 (5.359), cls_loss: 2.837 (3.807), time: 0.383 (2.570), eta: 3 days, 13:17:55, mem: 2198M
2020-04-03 09:50:15,050 SSD.trainer INFO: iter: 000540, lr: 0.00100, total_loss: 7.735 (9.139), reg_loss: 4.891 (5.351), cls_loss: 2.844 (3.789), time: 0.321 (2.529), eta: 3 days, 11:54:33, mem: 2198M
2020-04-03 09:50:18,850 SSD.trainer INFO

 69%|██████▊   | 246/359 [11:01<05:04,  2.69s/it]

In [None]:
logger.info('Start evaluating...')
torch.cuda.empty_cache()  # speed up evaluating after training finished
do_evaluation(cfg, model)