In [1]:
# Some basic setup:
# Setup detectron2 logger
import logging
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import torch

# import some common libraries
import numpy as np
import os, json, cv2, random
from collections import OrderedDict
# from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from matplotlib import pyplot as plt

import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
from detectron2.evaluation import (
    COCOEvaluator,
    DatasetEvaluators,
    verify_results,
)
from detectron2.modeling import GeneralizedRCNNWithTTA

from detectron2.data.build import build_detection_test_loader
from detectron2.engine import HookBase
from detectron2.engine.hooks import PeriodicWriter

from detectron2.data import detection_utils as utils
from detectron2.data.dataset_mapper import DatasetMapper
from detectron2.data.build import (_test_loader_from_config, build_detection_train_loader)

torch.multiprocessing.set_sharing_strategy('file_system')

In [2]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("street_train", {}, "path_to_annotations/annotations_train.json", "path_to_images/images")
register_coco_instances("street_val", {}, "path_to_annotations/annotations_val.json", "path_to_images/images")
register_coco_instances("street_test", {}, "path_to_annotations/annotations_test.json", "path_to_images/images")

In [None]:
#visualize training data
my_dataset_train_metadata = MetadataCatalog.get("street_train")
dataset_dicts = DatasetCatalog.get("street_train")
import random
from detectron2.utils.visualizer import Visualizer

for d in random.sample(dataset_dicts, 3):
    print(d["file_name"])
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=my_dataset_train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.imshow(vis.get_image()[:, :, ::1])
    plt.show()

In [5]:
def build_evaluator(cfg, dataset_name, output_folder=None):
    """
    Create evaluator(s) for a given dataset.
    This uses the special metadata "evaluator_type" associated with each builtin dataset.
    For your own dataset, you can simply create an evaluator manually in your
    script and do not have to worry about the hacky if-else logic here.
    """
    if output_folder is None:
        output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
    evaluator_list = []
    evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
    if evaluator_type in ["coco"]:
        evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
    if len(evaluator_list) == 0:
        raise NotImplementedError(
            "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type)
        )
    elif len(evaluator_list) == 1:
        return evaluator_list[0]
    return DatasetEvaluators(evaluator_list)

In [6]:
from itertools import cycle

class ValLossHook(HookBase):
    
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg.clone()
        self._loader = cycle(build_detection_test_loader(self.cfg, "street_val", DatasetMapper(self.cfg,True)))
        
    def after_step(self):
        """
            After each step calculates the validation loss and adds it to the train storage
        """
        data = next(self._loader)
#         print(len(data))
        with torch.no_grad():
            loss_dict = self.trainer.model(data)
            
            losses = sum(loss_dict.values())
            assert torch.isfinite(losses).all(), loss_dict

            loss_dict_reduced = {"val_" + k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            if comm.is_main_process():
                self.trainer.storage.put_scalars(val_total_loss=losses_reduced, 
                                                 **loss_dict_reduced)

In [7]:
import os
from torch.utils.tensorboard import SummaryWriter
from detectron2.utils.events import EventWriter, get_event_storage, CommonMetricPrinter, JSONWriter

class CustomTensorboardXWriter(EventWriter):
    """
    Writes scalars and images based on storage key to train or val tensorboard file.
    """

    def __init__(self, log_dir: str, window_size: int = 20, **kwargs):
        """
        Args:
            log_dir (str): the base directory to save the output events. This class creates two subdirs in log_dir
            window_size (int): the scalars will be median-smoothed by this window size

            kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)`
        """
        self._window_size = window_size
        
        # separate the writers into a train and a val writer
        train_writer_path = os.path.join(log_dir,"train")
        os.makedirs(train_writer_path, exist_ok=True)
        self._writer_train = SummaryWriter(train_writer_path, **kwargs)
        
        val_writer_path = os.path.join(log_dir,"val")
        os.makedirs(val_writer_path, exist_ok=True)
        self._writer_val = SummaryWriter(val_writer_path, **kwargs)

    def write(self):

        storage = get_event_storage()
        for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
            if k.startswith("val_"):
                k = k.replace("val_","")
                self._writer_val.add_scalar(k, v, iter)
            else:
                self._writer_train.add_scalar(k, v, iter)

        if len(storage._vis_data) >= 1:
            for img_name, img, step_num in storage._vis_data:
                if k.startswith("val_"):
                    k = k.replace("val_","")
                    self._writer_val.add_image(img_name, img, step_num)
                else:
                    self._writer_train.add_image(img_name, img, step_num)
            # Storage stores all image data and rely on this writer to clear them.
            # As a result it assumes only one writer will use its image data.
            # An alternative design is to let storage store limited recent
            # data (e.g. only the most recent image) that all writers can access.
            # In that case a writer may not see all image data if its period is long.
            storage.clear_images()

        if len(storage._histograms) >= 1:
            for params in storage._histograms:
                self._writer_train.add_histogram_raw(**params)
            storage.clear_histograms()

    def close(self):
        if hasattr(self, "_writer"):  # doesn't exist when the code fails at import
            self._writer_train.close()
            self._writer_val.close()


In [8]:
class Trainer(DefaultTrainer):
    """
    We use the "DefaultTrainer" which contains pre-defined default logic for
    standard training workflow. They may not work for you, especially if you
    are working on a new research project. In that case you can write your
    own training loop. You can use "tools/plain_train_net.py" as an example.
    """

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return build_evaluator(cfg, dataset_name, output_folder)
    
    def build_writers(self):
        """
        Overwrites the default writers to contain our custom tensorboard writer

        Returns:
            list[EventWriter]: a list of :class:`EventWriter` objects.
        """
        return [
            CommonMetricPrinter(self.max_iter),
            JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")),
            CustomTensorboardXWriter(self.cfg.OUTPUT_DIR),
        ]

    @classmethod
    def test_with_TTA(cls, cfg, model):
        logger = logging.getLogger("detectron2.trainer")
        # In the end of training, run an evaluation with TTA
        # Only support some R-CNN models.
        logger.info("Running inference with test-time augmentation ...")
        model = GeneralizedRCNNWithTTA(cfg, model)
        evaluators = [
            cls.build_evaluator(
                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
            )
            for name in cfg.DATASETS.TEST
        ]
        res = cls.test(cfg, model, evaluators)
        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
        return res

In [3]:
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("street_train",)
cfg.DATASETS.TEST = ("street_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS=False
cfg.OUTPUT_DIR = "/home/ryzen/sriram/detectron2_res/plitterstreet/outputWithEval_23Feb2022"

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001  
cfg.SOLVER.MAX_ITER = 60000
cfg.SOLVER.STEPS = (30000, 40000)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 1024
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4

cfg.TEST.EVAL_PERIOD=5000
# cfg.MODEL.DEVICE = "cuda:0"
cfg.TEST.AUG.ENABLED = True

In [None]:
trainer = Trainer(cfg)
trainer.register_hooks(
    [ValLossHook(cfg), hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
)    

periodic_writer_hook = [hook for hook in trainer._hooks if isinstance(hook, PeriodicWriter)]
all_other_hooks = [hook for hook in trainer._hooks if not isinstance(hook, PeriodicWriter)]
trainer._hooks = all_other_hooks + periodic_writer_hook
trainer.resume_or_load(resume=True)

In [None]:
trainer.train()

In [None]:
# evaluate the trainer model


from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("street_test", cfg, False, output_dir=None)
val_loader = build_detection_test_loader(cfg, "street_test")
print(inference_on_dataset(trainer.model, val_loader, evaluator))

In [None]:
# evaluate the predictor with setting score threshold

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
predictor = DefaultPredictor(cfg)

from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("street_val", output_dir="output_to_save")
val_loader = build_detection_test_loader(cfg, "street_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))

In [7]:
frame = None

In [8]:
from detectron2.config import get_cfg
from detectron2.utils.visualizer import _create_text_labels
import time, tqdm, json

thing_colors = [(255, 255, 0), (255, 0, 0), (0, 0, 255), (0, 255, 0)]

def runOnVideo(video, maxFrames):
    """ Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn.
    """

    readFrames = 1
    while True:
        hasFrame, frame = video.read()
        if not hasFrame:
            break
            
        predictions = predictor(frame)
        predictions = predictions["instances"].to("cpu")
        visualizer = Visualizer(frame[:, :, ::-1], metadata=plitterstreet_test_metadata, scale=1)
        
        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
        labels = _create_text_labels(classes, scores, plitterstreet_test_metadata.get("thing_classes", None))
        colors = [tuple(x/255 for x in thing_colors[c]) for c in classes]
        
#         vis = visualizer.draw_instance_predictions(predictions=outputs["instances"].to("cpu"))
        visualizer.overlay_instances(
            boxes = boxes,
            masks = None,
            labels = None,
            keypoints = None,
            assigned_colors = colors,
            alpha = 1
        )
        vis = visualizer.output 
        
        visualization = cv2.cvtColor(vis.get_image(), cv2.COLOR_RGB2BGR)
        predictions_dict = {"frameId": readFrames, "boxes": boxes, "scores": scores, "classes": classes}
        
        yield visualization, predictions_dict

        if readFrames > maxFrames:
            break
        readFrames += 1
        
def default(obj):
    return obj.tolist()

In [None]:
from detectron2.config import get_cfg
from detectron2.utils.visualizer import _create_text_labels
import time, tqdm, json

thing_colors = [(255, 255, 0), (255, 0, 0), (0, 0, 255), (0, 255, 0)]

from deep_sort import DeepSort
deepsort = DeepSort('deep_sort/deep/checkpoint/ckpt.t7', use_cuda=False)

def runOnVideoWithdeepsort(video, maxFrames):
    """ Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn.
    """

    readFrames = 1
    while True:
        hasFrame, frame = video.read()
        if not hasFrame:
            break
            
        predictions = predictor(frame)
        predictions = predictions["instances"].to("cpu")
        visualizer = Visualizer(frame[:, :, ::-1], metadata=plitterstreet_test_metadata, scale=1)
        
        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
        
        bbox_xcycwh, cls_conf, cls_ids = [], [], []

        for (box, _class, score) in zip(boxes, classes, scores):
            x0, y0, x1, y1 = box
            bbox_xcycwh.append([(x1 + x0) / 2, (y1 + y0) / 2, (x1 - x0), (y1 - y0)])
            cls_conf.append(score)
            cls_ids.append(_class)
        
        boxes = np.array(bbox_xcycwh, dtype=np.float64)
        scores = np.array(cls_conf)
        classes = np.array(cls_ids)
        
        tboxes = []
        tscores = []
        tclasses = []
        track_ids = []
        outputs = deepsort.update(boxes, scores, classes, frame)
        print(outputs)
        if outputs != []:
            tboxes = outputs[:, :4]
            tscores = outputs[:, 4]
            tclasses = outputs[:, 5]
            track_ids = outputs[:, 6]
        
        if len(tboxes) != len(boxes):
            print(tboxes, boxes, "danger!!!!!!!!!!!!")
#             break
        
        labels = _create_text_labels(tclasses, tscores, plitterstreet_test_metadata.get("thing_classes", None))
        colors = [tuple(x/255 for x in thing_colors[c]) for c in tclasses]
        
#         vis = visualizer.draw_instance_predictions(predictions=outputs["instances"].to("cpu"))
        visualizer.overlay_instances(
            boxes = tboxes,
            masks = None,
            labels = labels,
            keypoints = None,
            assigned_colors = colors,
            alpha = 1
        )
        vis = visualizer.output 
        
        visualization = cv2.cvtColor(vis.get_image(), cv2.COLOR_RGB2BGR)
        predictions_dict = {"frameId": readFrames, "boxes": tboxes, "scores": tscores, "classes": tclasses, "track_ids": track_ids}
        
        yield visualization, predictions_dict

        if readFrames > maxFrames:
            break
        readFrames += 1

In [None]:
video = cv2.VideoCapture('/media/ryzen/DATA/Hanwella/January_2022/videos/GX010102.MP4')
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
print(width, height, frames_per_second, num_frames)

video_writer = cv2.VideoWriter('/media/ryzen/DATA/Hanwella/January_2022/videos/outputs/GX010102_outputwithtrack.MP4', fourcc=cv2.VideoWriter_fourcc(*"mp4v"), fps=float(frames_per_second), frameSize=(width, height), isColor=True)
f = open("/media/ryzen/DATA/Hanwella/January_2022/videos/outputs/GX010102_outputwithtrack.json", "w")
predictions_json = []

c = 1
for visualization, predictions_dict in tqdm.tqdm(runOnVideoWithdeepsort(video, num_frames), total=num_frames):
#     visualization = result[0]
#     predictions_dict = result[1]
    cv2.imwrite('/media/ryzen/DATA/Hanwella/January_2022/videos/check/'+str(c)+'.jpg', visualization)
    video_writer.write(visualization)
    predictions_json.append(predictions_dict)
    c += 1


json.dump({"GX010102": json.dumps(predictions_json, default=default)}, f)
f.close()
video.release()
video_writer.release()
cv2.destroyAllWindows()

In [None]:
videos_dir = '/media/ryzen/pLitter1/Ubon/January_2022/videos/1280/'
videos_outdir = '/media/ryzen/pLitter1/Ubon/January_2022/videos/outputs/'
for filename in sorted(os.listdir(videos_dir)):
    if filename.endswith('.MP4') and filename not in os.listdir(videos_outdir):
        print(filename)
        video = cv2.VideoCapture(videos_dir+filename)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        print(width, height, frames_per_second, num_frames)

        video_writer = cv2.VideoWriter(videos_outdir+filename, fourcc=cv2.VideoWriter_fourcc(*"mp4v"), fps=float(frames_per_second), frameSize=(width, height), isColor=True)
        f = open(videos_outdir+filename.replace('.MP4', '.json'), "w")
        predictions_json = []

        c = 1
        for visualization, predictions_dict in tqdm.tqdm(runOnVideo(video, num_frames), total=num_frames):
#             cv2.imwrite(videos_outdir+'check/'+str(c)+'.jpg', visualization)
            video_writer.write(visualization)
            predictions_json.append(predictions_dict)
            c += 1


        json.dump({filename.replace(".MP4", ""): json.dumps(predictions_json, default=default)}, f)
        f.close()
        video.release()
        video_writer.release()
        cv2.destroyAllWindows()