In [None]:
# For Google colab
# !python -m pip install pyyaml==5.1
# import sys, os, distutils.core
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# from google.colab import drive
# drive.mount('/content/drive')


In [1]:
# import libraries
import os, cv2, tqdm

# import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode
from detectron2.data.datasets import register_coco_instances


In [2]:
# Register the coco dataset
path = "dataset"
# path = "/content/drive/MyDrive/Colab Notebooks/MLPROJECT/dataset/" # for google colab
register_coco_instances(
    "bathroom-dataset",
    {},
    f"{path}/annotations/instances_default.json",
    f"{path}/images/",
)


In [3]:
# Get the metadata and dataset
bathroom_metadata = MetadataCatalog.get("bathroom-dataset")
dataset_dicts = DatasetCatalog.get("bathroom-dataset")


In [4]:
# Setup up model configuration
cfg = get_cfg()
cfg.MODEL.DEVICE = "cpu"
# cfg.MODEL.DEVICE = "cuda" # for google colab
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
)
cfg.DATASETS.TRAIN = ("bathroom-dataset",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
)
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 2000
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10


In [None]:
# TRAIN THE MODEL ON THE DATASET (UNCOMMENT TO TRAIN)
# os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
# trainer = DefaultTrainer(cfg)
# trainer.resume_or_load(resume=False)
# trainer.train()


In [5]:
# Read model from file and create predictor
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.75
predictor = DefaultPredictor(cfg)


In [None]:
# Use the trained model to make predictions on the video using the predictor
v = VideoVisualizer(
    MetadataCatalog.get("bathroom-dataset"), instance_mode=ColorMode.IMAGE
)


def runvideo(video, maxFrames):
    readframes = 0
    while True:
        ret, frame = video.read()
        if not ret:
            break

        outputs = predictor(frame)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        visualization = v.draw_instance_predictions(
            frame, outputs["instances"].to("cpu")
        )

        visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)

        yield visualization

        readframes += 1
        if readframes > maxFrames:
            break


video = cv2.VideoCapture("video.mp4")

if video.isOpened() == False:
    print("Error opening video stream or file")

# Get video properties
frame_width = int(video.get(3))
frame_height = int(video.get(4))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# Define the codec and create VideoWriter object
video_writer = cv2.VideoWriter(
    "segmented-video.mp4",
    fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
    fps=float(frames_per_second),
    frameSize=(frame_width, frame_height),
    isColor=True,
)

for visualization in tqdm.tqdm(runvideo(video, num_frames), total=num_frames):
    video_writer.write(visualization)

video.release()
video_writer.release()
cv2.destroyAllWindows()
