In [None]:
import argparse
import random
import os
import torch
import functools
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.data import make_data_loader
from maskrcnn_benchmark.utils.comm import get_rank
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
from maskrcnn_benchmark.utils.mlperf_logger import configure_logger
from maskrcnn_benchmark.utils.mlperf_logger import log_start
from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.solver import make_optimizer
from maskrcnn_benchmark.solver import make_lr_scheduler
from maskrcnn_benchmark.engine.trainer import do_train

In [None]:
from mlperf_logging.mllog import constants

In [None]:
import json

In [None]:
configure_logger(constants.MASKRCNN)
num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
distributed = num_gpus > 1

In [None]:
parser = argparse.ArgumentParser(description="PyTorch Object Detection Training")
parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
parser.add_argument("--local_rank", type=int, default=os.getenv('LOCAL_RANK', 0))
parser.add_argument(
    "opts",
    help="Modify config options using the command-line",
    default=None,
    nargs=argparse.REMAINDER,
)

# args = parser.parse_args()

In [None]:
args = parser.parse_args()

In [None]:
cfg.merge_from_file("configs/e2e_mask_rcnn_R_50_FPN_1x_1_node_test.yaml")
# cfg.merge_from_list(args.opts)
cfg.freeze()

In [None]:
if cfg.DATALOADER.ALWAYS_PAD_TO_MAX or cfg.USE_CUDA_GRAPH:
    min_size = cfg.INPUT.MIN_SIZE_TRAIN[0] if isinstance(cfg.INPUT.MIN_SIZE_TRAIN, tuple) else cfg.INPUT.MIN_SIZE_TRAIN
    max_size = cfg.INPUT.MAX_SIZE_TRAIN[0] if isinstance(cfg.INPUT.MAX_SIZE_TRAIN, tuple) else cfg.INPUT.MAX_SIZE_TRAIN
    divisibility = max(1, cfg.DATALOADER.SIZE_DIVISIBILITY)
    shapes_per_orientation = cfg.CUDA_GRAPH_NUM_SHAPES_PER_ORIENTATION

    min_size = ((min_size + divisibility - 1) // divisibility) * divisibility
    max_size = ((max_size + divisibility - 1) // divisibility) * divisibility
    size_range = (max_size - min_size) // divisibility

    shapes = []
    for i in range(0,shapes_per_orientation):
        size = min_size + ((i+1) * size_range // shapes_per_orientation) * divisibility
        shapes.append( (min_size, size) )
        shapes.append( (size, min_size) )
    print(shapes)
else:
    shapes = None

In [None]:
master_seed = random.SystemRandom().randint(0, 2 ** 32 - 1)
random_number_generator = random.Random(master_seed)
data_loader, iters_per_epoch = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=0,
            random_number_generator=random_number_generator,
            seed=master_seed,
            shapes=shapes,
            hybrid_dataloader=None,
        )

In [None]:
def mlperf_log_epoch_start(iteration, iters_per_epoch):
    # First iteration:
    #     Note we've started training & tag first epoch start
    if iteration == 0:
        log_start(key=constants.BLOCK_START, metadata={"first_epoch_num":1, "epoch_count":1})
        log_start(key=constants.EPOCH_START, metadata={"epoch_num":1})
        return
    if iteration % iters_per_epoch == 0:
        epoch = iteration // iters_per_epoch + 1
        log_start(key=constants.BLOCK_START, metadata={"first_epoch_num": epoch, "epoch_count": 1})
        log_start(key=constants.EPOCH_START, metadata={"epoch_num": epoch})

In [None]:
model = build_detection_model(cfg)
# device = torch.device(cfg.MODEL.DEVICE)
# model.to(device)

In [None]:
world_size = 1
dedicated_evaluation_ranks = max(0,cfg.DEDICATED_EVALUATION_RANKS)
num_training_ranks = world_size - dedicated_evaluation_ranks
images_per_gpu_train = cfg.SOLVER.IMS_PER_BATCH

In [None]:
# model.train()
optimizer = make_optimizer(cfg, model)
scheduler = make_lr_scheduler(cfg, optimizer)
checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

In [None]:
arguments = {}
arguments["iteration"] = 0
arguments["nhwc"] = cfg.NHWC
arguments['ims_per_batch'] = cfg.SOLVER.IMS_PER_BATCH
arguments["distributed"] = distributed
arguments["max_annotations_per_image"] = cfg.DATALOADER.MAX_ANNOTATIONS_PER_IMAGE
arguments["dedicated_evaluation_ranks"] = dedicated_evaluation_ranks
arguments["num_training_ranks"] = num_training_ranks
arguments["training_comm"] = None if dedicated_evaluation_ranks == 0 else training_comm
arguments["images_per_gpu_train"] = images_per_gpu_train
arguments["use_synthetic_input"] = cfg.DATALOADER.USE_SYNTHETIC_INPUT
assert not (cfg.DATALOADER.USE_SYNTHETIC_INPUT and cfg.DATALOADER.HYBRID), "USE_SYNTHETIC_INPUT and HYBRID can't both be used together"
arguments["enable_nsys_profiling"] = cfg.ENABLE_NSYS_PROFILING
output_dir = cfg.OUTPUT_DIR

save_to_disk = get_rank() == 0
checkpointer = DetectronCheckpointer(
    cfg, model, optimizer, scheduler, output_dir, save_to_disk
)
arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS

extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT, cfg.NHWC)
arguments.update(extra_checkpoint_data)

In [None]:
per_iter_callback_fn = None
final_callback_fn=None
rank=0

In [None]:
success = do_train(
            model,
            data_loader,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
            cfg.DISABLE_REDUCED_LOGGING,
            cfg.DISABLE_LOSS_LOGGING,
            per_iter_start_callback_fn=functools.partial(mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch),
            per_iter_end_callback_fn=per_iter_callback_fn,
            final_callback_fn=final_callback_fn,
            rank=rank
        )