In [1]:
import cv2
import torch
from d2go.runner import GeneralizedRCNNRunner
from d2go.model_zoo import model_zoo
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from matplotlib import pyplot as plt

In [2]:
# Load the configuration like in the training script
runner = GeneralizedRCNNRunner()
cfg = runner.get_default_cfg()
cfg.merge_from_file(model_zoo.get_config_file("faster_rcnn_fbnetv3a_C4.yaml"))
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

INFO:d2go.runner.default_runner:Initializing control pg


In [3]:
# Register the datasets instances
from detectron2.data.datasets import register_coco_instances
register_coco_instances("fire_train", {}, "/home/yali/coco-annotator/forest-fires-t.json", "/home/yali/coco-annotator/datasets/forest-fires/train_images")
register_coco_instances("fire_val", {}, "/home/yali/coco-annotator/forest-fires-v.json", "/home/yali/coco-annotator/datasets/forest-fires/val_images")

In [5]:
model_path = "/home/yali/coco-annotator/trained-models/model_weights_958-203_rcnn_fbnetv3a_C4.pth"
model = runner.build_model(cfg)
model.load_state_dict(torch.load(model_path, map_location=cfg.MODEL.DEVICE))
model.eval()

from d2go.utils.demo_predictor import DemoPredictor
predictor = DemoPredictor(model)

INFO:d2go.modeling.backbone.fbnet_v2:Build FBNet using unified arch_def:
trunk
- {'block_op': 'conv_k3', 'block_cfg': {'out_channels': 16, 'stride': 2}, 'stage_idx': 0, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 16, 'stride': 1, 'expansion': 1, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 1}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 16, 'stride': 1, 'expansion': 1, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 2}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 24, 'stride': 2, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 0}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 24, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 1}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 24, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 2}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 24, 'stride': 1, 'expansio

In [6]:
from detectron2.structures import Boxes
from detectron2.structures import Instances

# Load the video
video_path = "1-Zenmuse_X4S_1.mp4"  # Raw video from Zenmuse X4S cameras (IEEE FLAME Dataset)
# video_path = "2-Zenmuse_X4S_2.mp4"  # Raw video from Zenmuse X4S cameras for one specific pile (IEEE FLAME Dataset)
cap = cv2.VideoCapture(video_path)

# Detect the frames every 5 seconds

# Get the frame rate of the video
fps = cap.get(cv2.CAP_PROP_FPS)

# Calculate the number of frames to skip (5 seconds * frame rate)
skip_frames = int(5 * fps)


In [8]:
frame_number = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break  # End of video

    # Process frame every 5 seconds
    if frame_number % skip_frames == 0:
        # Detect fires in the frame
        outputs = predictor(frame)

        # Check if any fires are detected
        if len(outputs["instances"]) > 0:
            # Calculate the area of the image
            image_area = frame.shape[0] * frame.shape[1]
            
            height, width = frame.shape[:2]
            
            # Get the tensor of all bounding boxes
            boxes_tensor = outputs["instances"].pred_boxes.tensor  # Access the tensor directly
            
            # Filter out boxes that are more than 10% of the image size
            filtered_indices = []
            for i, box in enumerate(boxes_tensor):
                x1, y1, x2, y2 = box
                box_area = (x2 - x1) * (y2 - y1)
                if box_area / image_area <= 0.10:  # Keep the box if it's <= 10% of the image area
                    filtered_indices.append(i)
                    
            new_instances = Instances((height, width))
            
            # Update outputs with filtered boxes and corresponding classes and scores
            new_instances.set("pred_boxes", Boxes(boxes_tensor[filtered_indices]))
            new_instances.set("pred_classes", outputs["instances"].pred_classes[filtered_indices])
            new_instances.set("scores", outputs["instances"].scores[filtered_indices])
            outputs["instances"] = new_instances
            
            # the output object categories and corresponding bounding boxes
            print(outputs["instances"].pred_classes)
            print(outputs["instances"].pred_boxes)
            
            from detectron2.utils.visualizer import Visualizer
            from detectron2.data import MetadataCatalog, DatasetCatalog
            
            MetadataCatalog.get("fire_train").set(thing_classes=["fire"])
            MetadataCatalog.get("fire_val").set(thing_classes=["fire"])
            v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get("fire_train"))
            out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
            
            # Convert visualized frame for saving
            save_frame = out.get_image()[:, :, ::-1]
            
            # Save the frame with detected fires
            frame_save_path = f"./video_frames_1/frame_{frame_number}.jpg"  # For video 1
            #frame_save_path = f"./video_frames_2/frame_{frame_number}.jpg"  # For video 2
            cv2.imwrite(frame_save_path, save_frame)

    frame_number += 1

cap.release()