In [1]:
import os
import random
import numpy as np
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.model_zoo import model_zoo
from detectron2.utils.visualizer import Visualizer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Register your training dataset with Detectron2
register_coco_instances("custom_dataset_train", {}, "train_output_coco_annotations.json", "train/images")

# Register your test dataset with Detectron2
register_coco_instances("custom_dataset_test", {}, "test_output_coco_annotations.json", "test/images")

# Define metadata for your training dataset (class names)
metadata_train = MetadataCatalog.get("custom_dataset_train")

# Define metadata for your test dataset (class names)
metadata_test = MetadataCatalog.get("custom_dataset_test")

# Create a configuration
cfg = get_cfg()

# Set your custom configuration options here, for example:
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("custom_dataset_train",)
cfg.DATASETS.TEST = ("custom_dataset_test",)  # Include your test dataset
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 30  # Increase the maximum number of iterations
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.MODEL.DEVICE = 'cpu'  # Use 'cuda' if GPU is available

# Instantiate a trainer
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)

# Train the model
trainer.train()

# Optionally, evaluate the model on the test set
evaluator = COCOEvaluator("custom_dataset_test", cfg, False, output_dir="./output/")
test_loader = build_detection_test_loader(cfg, "custom_dataset_test")
test_results = inference_on_dataset(trainer.model, test_loader, evaluator)

# Save the model's configuration to a YAML file
model_config_path = "model_config.yaml"
with open(model_config_path, "w") as f:
    f.write(cfg.dump())

print(f"Model's configuration saved to {model_config_path}")

# Print and visualize the test results
print("Test results:")
print(test_results)


[32m[09/08 11:11:56 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[09/08 11:11:56 d2.data.datasets.coco]: [0mLoaded 6 images in COCO format from train_output_coco_annotations.json
[32m[09/08 11:11:56 d2.data.build]: [0mRemoved 3 images with no usable annotations. 3 images left.
[32m[09/08 11:11:56 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   damage   | 17           |
|            |              |[0m
[32m[09/08 11:11:56 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[09/08 11:11:56 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[09/08 11:11:56 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[09/08 11:11:56 d2.data.common]: [0mSerializing 3 elements to byte tensors and concatenating them all ...
[32m[09/08 11:11:5

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[09/08 11:11:57 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[09/08 11:13:36 d2.utils.events]: [0m eta: 0:00:47  iter: 19  total_loss: 2.42  loss_cls: 0.686  loss_box_reg: 0.3066  loss_mask: 0.6877  loss_rpn_cls: 0.6121  loss_rpn_loc: 0.2084    time: 4.7709  last_time: 5.3022  data_time: 0.1141  last_data_time: 0.0038   lr: 0.00015842  
[32m[09/08 11:14:27 d2.utils.events]: [0m eta: 0:00:00  iter: 29  total_loss: 1.986  loss_cls: 0.4494  loss_box_reg: 0.3976  loss_mask: 0.6685  loss_rpn_cls: 0.229  loss_rpn_loc: 0.1769    time: 4.7349  last_time: 3.9816  data_time: 0.0031  last_data_time: 0.0027   lr: 0.00024168  
[32m[09/08 11:14:27 d2.engine.hooks]: [0mOverall training speed: 28 iterations in 0:02:12 (4.7349 s / it)
[32m[09/08 11:14:27 d2.engine.hooks]: [0mTotal training time: 0:02:17 (0:00:04 on hooks)
[32m[09/08 11:14:27 d2.data.datasets.coco]: [0mLoaded 10 images in COCO format from test_output_coco_annotations.json
[32m[09/08 11:14:28 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShorte

In [None]:
import os
import random
import numpy as np
from detectron2.data import MetadataCatalog
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
import cv2

# Load the saved model weights and configuration
cfg = get_cfg()
cfg.merge_from_file("model_config.yaml")  # Replace with the path to your model's configuration file
cfg.MODEL.WEIGHTS = "best_model.pth"  # Replace with the path to your saved model weights

# Set the device to CPU
cfg.MODEL.DEVICE = 'cpu'

# Create a predictor using the loaded model
predictor = DefaultPredictor(cfg)

# Get a random image from the "train/images" folder
image_folder = 'train/images'  # Replace with the path to your image folder
image_files = os.listdir(image_folder)
random_image_file = random.choice(image_files)
image_path = os.path.join(image_folder, random_image_file)

# Read the random image
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Get predictions on the image
outputs = predictor(image)

# Filter predictions with confidence > 75%
instances = outputs["instances"]
filtered_instances = instances[instances.scores > 0.75]

# Visualize the original image with filtered predictions
v = Visualizer(image, metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(filtered_instances.to("cpu"))

# Show the image with predictions
cv2.imshow("Predictions", out.get_image()[:, :, ::-1])

# Construct the path to the corresponding mask image
mask_folder = 'train/binned_targets'  # Replace with the path to your mask folder
mask_file = os.path.join(mask_folder, os.path.splitext(random_image_file)[0] + '_target.png')

# Read and show the mask image
mask_image = cv2.imread(mask_file)
cv2.imshow("Mask", mask_image)

cv2.waitKey(0)
# Close all OpenCV windows when any key is pressed
cv2.destroyAllWindows()
cv2.waitKey(1)