In [1]:
import os, sys
import glob

import cv2

import register_widerface

In [2]:
!python register_widerface.py

In [3]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

In [4]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-Detection/retinanet_R_50_FPN_1x.yaml'))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_50_FPN_1x.yaml")

cfg.DATASETS.TRAIN = ("widerface_train",)
cfg.DATASETS.TEST = ("widerface_val",)
cfg.DATALOADER.NUM_WORKERS = 4

cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 1500
cfg.SOLVER.STEPS = (1000, 1500)
cfg.SOLVER.GAMMA = 0.05

cfg.MODEL.RETINANET.NUM_CLASSES = 1

cfg.TEST.EVAL_PERIOD = 1000

Loading config /home/idealabs/Libs/miniconda3/envs/.torchenv/lib/python3.7/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


In [5]:
class CocoTrainer(DefaultTrainer):

  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("wider_eval", exist_ok=True)
        output_folder = "wider_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [6]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

trainer = CocoTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[12/18 17:11:35 d2.engine.defaults]: [0mModel:
RetinaNet(
  (backbone): FPN(
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelP6P7(
      (p6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (p7): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
   

[32m[12/18 17:11:36 d2.data.datasets.coco]: [0mLoaded 12880 images in COCO format from /home/idealabs/data/opensource_dataset/WIDER/widerface_train_coco.json
[32m[12/18 17:11:37 d2.data.build]: [0mRemoved 4 images with no usable annotations. 12876 images left.
[32m[12/18 17:11:37 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|    face    | 159420       |
|            |              |[0m
[32m[12/18 17:11:37 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[12/18 17:11:37 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[12/18 17:11:37 d2.data.common]: [0mSerializing 12876 elements to byte tensors and concatenating them all ...
[32m[12/18 17:11:37 d2.data.common]: [0mSerialized dataset takes 12.31 MiB


Skip loading parameter 'head.cls_score.weight' to the model due to incompatible shapes: (720, 256, 3, 3) in the checkpoint but (9, 256, 3, 3) in the model! You might want to double check if this is expected.
Skip loading parameter 'head.cls_score.bias' to the model due to incompatible shapes: (720,) in the checkpoint but (9,) in the model! You might want to double check if this is expected.


[32m[12/18 17:11:37 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[12/18 17:11:47 d2.utils.events]: [0m eta: 0:11:20  iter: 19  total_loss: 2.326  loss_cls: 1.43  loss_box_reg: 0.8959  time: 0.4691  data_time: 0.0163  lr: 1.9981e-05  max_mem: 4625M
[32m[12/18 17:11:55 d2.utils.events]: [0m eta: 0:10:41  iter: 39  total_loss: 1.293  loss_cls: 0.6878  loss_box_reg: 0.6069  time: 0.4402  data_time: 0.0047  lr: 3.9961e-05  max_mem: 4625M
[32m[12/18 17:12:04 d2.utils.events]: [0m eta: 0:10:46  iter: 59  total_loss: 1.358  loss_cls: 0.7424  loss_box_reg: 0.6313  time: 0.4453  data_time: 0.0047  lr: 5.9941e-05  max_mem: 4990M
[32m[12/18 17:12:14 d2.utils.events]: [0m eta: 0:10:38  iter: 79  total_loss: 0.8775  loss_cls: 0.5054  loss_box_reg: 0.3721  time: 0.4435  data_time: 0.0049  lr: 7.9921e-05  max_mem: 7800M
[32m[12/18 17:12:23 d2.utils.events]: [0m eta: 0:10:18  iter: 99  total_loss: 0.7789  loss_cls: 0.4407  loss_box_reg: 0.3186  time: 0.4386  data_time: 0

[32m[12/18 17:18:19 d2.utils.events]: [0m eta: 0:04:39  iter: 879  total_loss: 0.4912  loss_cls: 0.2502  loss_box_reg: 0.2477  time: 0.4483  data_time: 0.0049  lr: 0.00087912  max_mem: 9169M
[32m[12/18 17:18:28 d2.utils.events]: [0m eta: 0:04:30  iter: 899  total_loss: 0.3715  loss_cls: 0.1562  loss_box_reg: 0.2019  time: 0.4486  data_time: 0.0047  lr: 0.0008991  max_mem: 9169M
[32m[12/18 17:18:37 d2.utils.events]: [0m eta: 0:04:21  iter: 919  total_loss: 0.4424  loss_cls: 0.1487  loss_box_reg: 0.2959  time: 0.4485  data_time: 0.0047  lr: 0.00091908  max_mem: 9169M
[32m[12/18 17:18:46 d2.utils.events]: [0m eta: 0:04:12  iter: 939  total_loss: 0.38  loss_cls: 0.1268  loss_box_reg: 0.2532  time: 0.4487  data_time: 0.0045  lr: 0.00093906  max_mem: 9169M
[32m[12/18 17:18:55 d2.utils.events]: [0m eta: 0:04:03  iter: 959  total_loss: 0.4985  loss_cls: 0.208  loss_box_reg: 0.282  time: 0.4487  data_time: 0.0050  lr: 0.00095904  max_mem: 9169M
[32m[12/18 17:19:04 d2.utils.events]: 

[32m[12/18 17:22:16 d2.utils.events]: [0m eta: 0:03:36  iter: 1019  total_loss: 0.3223  loss_cls: 0.1236  loss_box_reg: 0.1987  time: 0.4484  data_time: 0.0055  lr: 5e-05  max_mem: 9169M
[32m[12/18 17:22:25 d2.utils.events]: [0m eta: 0:03:27  iter: 1039  total_loss: 0.327  loss_cls: 0.1292  loss_box_reg: 0.2137  time: 0.4486  data_time: 0.0047  lr: 5e-05  max_mem: 9169M
[32m[12/18 17:22:34 d2.utils.events]: [0m eta: 0:03:18  iter: 1059  total_loss: 0.318  loss_cls: 0.1193  loss_box_reg: 0.1988  time: 0.4479  data_time: 0.0051  lr: 5e-05  max_mem: 9169M
[32m[12/18 17:22:43 d2.utils.events]: [0m eta: 0:03:09  iter: 1079  total_loss: 0.4284  loss_cls: 0.1256  loss_box_reg: 0.2628  time: 0.4476  data_time: 0.0050  lr: 5e-05  max_mem: 9169M
[32m[12/18 17:22:52 d2.utils.events]: [0m eta: 0:03:00  iter: 1099  total_loss: 0.3765  loss_cls: 0.1437  loss_box_reg: 0.2302  time: 0.4479  data_time: 0.0059  lr: 5e-05  max_mem: 9169M
[32m[12/18 17:23:01 d2.utils.events]: [0m eta: 0:02:51 

[32m[12/18 17:27:58 d2.evaluation.evaluator]: [0mInference done 2329/3222. 0.0510 s / img. ETA=0:00:46
[32m[12/18 17:28:03 d2.evaluation.evaluator]: [0mInference done 2423/3222. 0.0511 s / img. ETA=0:00:41
[32m[12/18 17:28:08 d2.evaluation.evaluator]: [0mInference done 2520/3222. 0.0511 s / img. ETA=0:00:36
[32m[12/18 17:28:13 d2.evaluation.evaluator]: [0mInference done 2618/3222. 0.0511 s / img. ETA=0:00:31
[32m[12/18 17:28:19 d2.evaluation.evaluator]: [0mInference done 2716/3222. 0.0510 s / img. ETA=0:00:26
[32m[12/18 17:28:24 d2.evaluation.evaluator]: [0mInference done 2812/3222. 0.0511 s / img. ETA=0:00:21
[32m[12/18 17:28:29 d2.evaluation.evaluator]: [0mInference done 2908/3222. 0.0511 s / img. ETA=0:00:16
[32m[12/18 17:28:34 d2.evaluation.evaluator]: [0mInference done 3004/3222. 0.0511 s / img. ETA=0:00:11
[32m[12/18 17:28:39 d2.evaluation.evaluator]: [0mInference done 3097/3222. 0.0512 s / img. ETA=0:00:06
[32m[12/18 17:28:44 d2.evaluation.evaluator]: [0mInfe

In [7]:
ls output/

coco_instances_results.json
events.out.tfevents.1608275887.ubuntu.12790.0
events.out.tfevents.1608275996.ubuntu.12790.1
events.out.tfevents.1608277171.ubuntu.17549.0
events.out.tfevents.1608277211.ubuntu.17714.0
events.out.tfevents.1608280190.ubuntu.17714.1
events.out.tfevents.1608281006.ubuntu.17714.2
events.out.tfevents.1608282354.ubuntu.17714.3
events.out.tfevents.1608282697.ubuntu.7911.0
instances_predictions.pth
last_checkpoint
metrics.json
model_final.pth


In [8]:
# evaluation
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth')
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.85

evaluator = COCOEvaluator('widerface_val', cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "widerface_val")
inference_on_dataset(trainer.model, val_loader, evaluator)

[32m[12/18 17:28:51 d2.data.datasets.coco]: [0mLoaded 3222 images in COCO format from /home/idealabs/data/opensource_dataset/WIDER/widerface_val_coco.json
[32m[12/18 17:28:51 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[12/18 17:28:51 d2.data.common]: [0mSerializing 3222 elements to byte tensors and concatenating them all ...
[32m[12/18 17:28:51 d2.data.common]: [0mSerialized dataset takes 3.06 MiB
[32m[12/18 17:28:51 d2.evaluation.evaluator]: [0mStart inference on 3222 images
[32m[12/18 17:28:52 d2.evaluation.evaluator]: [0mInference done 11/3222. 0.0509 s / img. ETA=0:02:45
[32m[12/18 17:28:57 d2.evaluation.evaluator]: [0mInference done 108/3222. 0.0510 s / img. ETA=0:02:41
[32m[12/18 17:29:02 d2.evaluation.evaluator]: [0mInference done 206/3222. 0.0507 s / img. ETA=0:02:35
[32m[12/18 17:29:07 d2.evaluation.evaluator]: [0mInference done 303/3

OrderedDict([('bbox',
              {'AP': 23.523482945037163,
               'AP50': 46.04143818044157,
               'AP75': 22.13662667306016,
               'APs': 10.922065542102427,
               'APm': 51.48030704738243,
               'APl': 60.46302256891186})])

In [9]:
# test
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth')
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.7

predictor = DefaultPredictor(cfg)
metadata = MetadataCatalog.get("widerface_train")

In [None]:
image_path = "/home/idealabs/data/opensource_dataset/WIDER/test/images/0--Parade/"
for _ in glob.glob(image_path+"/*.jpg"):
    img = cv2.imread(_)
    outputs = predictor(img)
    v = Visualizer(
        img[:, :, ::-1],
        metadata=metadata,
        scale=1,
        instance_mode=ColorMode.IMAGE
    )
    instances = outputs['instances'].to('cpu')
    out = v.draw_instance_predictions(instances)
    cv2.imshow("out", out.get_image()[:, :, ::-1])
    cv2.waitKey(1000)