Setup directories

In [1]:
# import mlflow
# mlflow.autolog(log_models=True)
# mlflow.set_tracking_uri('https://mlflow.krschap.tech')

In [1]:
import os

# Define a single working directory
WORK_DIR = r"training_dataset_generated\training_sets"

# Now use it in all paths
IMAGE_DIR_TRAIN = os.path.join(WORK_DIR, "train_images")
IMAGE_DIR_VAL = os.path.join(WORK_DIR, "val_images")
ANNOTATIONS_DIR = os.path.join(WORK_DIR, "annotations")

TRAIN_JSON = os.path.join(ANNOTATIONS_DIR, "instances_train.json")
VAL_JSON = os.path.join(ANNOTATIONS_DIR, "instances_val.json")
OUTPUT_DIR = os.path.join(WORK_DIR, "model")

# Create output dir if not exist
os.makedirs(OUTPUT_DIR, exist_ok=True)


Step 1: Dataset Registration (Detectron2)

In [2]:
from detectron2.data.datasets import register_coco_instances

register_coco_instances("solar_train", {}, TRAIN_JSON, IMAGE_DIR_TRAIN)
register_coco_instances("solar_val", {}, VAL_JSON, IMAGE_DIR_VAL)


 test if the datasets are correctly registered: The image will appear where you can check visually image and masks are properly overlaid or not. 

In [3]:
from detectron2.utils.visualizer import Visualizer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode, PolygonMasks
import random
import cv2
import numpy as np
import copy

# ---- CONFIG ----
RESIZE_DIM = (256, 256)

# ---- Load dataset ----
dataset_dicts = DatasetCatalog.get("solar_train")
metadata = MetadataCatalog.get("solar_train")

# Pick a random sample
sample = copy.deepcopy(random.choice(dataset_dicts))
img_path = sample["file_name"]
img = cv2.imread(img_path)

# Resize image
orig_height, orig_width = img.shape[:2]
resized_img = cv2.resize(img, RESIZE_DIM)
scale_x = RESIZE_DIM[0] / orig_width
scale_y = RESIZE_DIM[1] / orig_height

# Create blank overlay for masks
mask_overlay = np.zeros_like(resized_img)

# Draw only masks (skip bbox)
for ann in sample["annotations"]:
    if "segmentation" in ann and isinstance(ann["segmentation"], list):
        for seg in ann["segmentation"]:
            pts = np.array(seg).reshape(-1, 2)
            pts[:, 0] *= scale_x
            pts[:, 1] *= scale_y
            pts = np.round(pts).astype(np.int32)
            cv2.fillPoly(mask_overlay, [pts], color=(0, 0, 255))  # red fill

# Blend the mask onto the image
alpha = 0.4
blended = cv2.addWeighted(resized_img, 1, mask_overlay, alpha, 0)

# Show result
cv2.imshow("Segmentation Only", blended)
cv2.waitKey(10000)
cv2.destroyAllWindows()



Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.



Step 2: Set Config and Train

In [None]:
#run this 1 or 2 for training.
#1. applied things to fix small instances of solar

In [5]:

import os
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
import torch

# Define working directory
WORK_DIR = r"C:\Users\ROG\Documents\Termatics\segmentation\detectron_maskrcnn\training_dataset_generated\training_sets"
OUTPUT_DIR = os.path.join(WORK_DIR, "output")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Create config
cfg = get_cfg()

# Load base config
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

# Register datasets
cfg.DATASETS.TRAIN = ("solar_train",)
cfg.DATASETS.TEST = ("solar_val",)

# Data loader
cfg.DATALOADER.NUM_WORKERS = 4  # More workers can help with larger data

# Pretrained weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

# Solver settings
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3000  # Increased iterations for better convergence
cfg.SOLVER.STEPS = []

# ROI Head
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Only solar panel class

# Enable image size resizing for training
cfg.INPUT.MIN_SIZE_TRAIN = (512, 768, 1024)  # Multi-scale training
cfg.INPUT.MAX_SIZE_TRAIN = 1333
cfg.INPUT.MIN_SIZE_TEST = 1024
cfg.INPUT.MAX_SIZE_TEST = 1333

# Data augmentation
cfg.INPUT.RANDOM_FLIP = "horizontal"

# Set device
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Output
cfg.OUTPUT_DIR = OUTPUT_DIR

# Trainer
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()


[32m[07/29 18:07:07 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[07/29 18:07:08 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[07/29 18:07:19 d2.utils.events]: [0m eta: 0:14:40  iter: 19  total_loss: 1.983  loss_cls: 0.5983  loss_box_reg: 0.4473  loss_mask: 0.6929  loss_rpn_cls: 0.1324  loss_rpn_loc: 0.04682    time: 0.2608  last_time: 0.1768  data_time: 0.2435  last_data_time: 0.0023   lr: 4.9953e-06  max_mem: 2288M
[32m[07/29 18:07:25 d2.utils.events]: [0m eta: 0:14:24  iter: 39  total_loss: 2.005  loss_cls: 0.525  loss_box_reg: 0.5183  loss_mask: 0.6834  loss_rpn_cls: 0.1607  loss_rpn_loc: 0.05031    time: 0.2544  last_time: 0.2198  data_time: 0.0019  last_data_time: 0.0017   lr: 9.9902e-06  max_mem: 2288M
[32m[07/29 18:07:31 d2.utils.events]: [0m eta: 0:14:20  iter: 59  total_loss: 1.816  loss_cls: 0.462  loss_box_reg: 0.4345  loss_mask: 0.667  loss_rpn_cls: 0.1085  loss_rpn_loc: 0.039    time: 0.2613  last_time: 0.3121  data_time: 0.0019  last_data_time: 0.0019   lr: 1.4985e-05  max_mem: 2288M
[32m[07/29 18:07:36 d2.utils.events]: [0m eta: 0:14:18  iter: 79  total_loss: 1.706  loss_cls: 0.389

KeyboardInterrupt: 

In [None]:
#2. save best model and apply more augmentations

In [4]:
import os
import torch
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.data import build_detection_train_loader
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from detectron2.data import DatasetMapper

#mlflow tracking
# import mlflow
# mlflow.pytorch.autolog(log_models=True)
# mlflow.set_tracking_uri('https://mlflow.krschap.tech')


# Define working directory
WORK_DIR = r"C:\Users\ROG\Documents\Termatics\segmentation\detectron_maskrcnn\training_dataset_generated\training_sets"
OUTPUT_DIR = os.path.join(WORK_DIR, "model")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Custom mapper with augmentation
def custom_mapper(dataset_dict):
    dataset_dict = dataset_dict.copy()
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    aug_input = T.AugInput(image)

    augmentation_list = [
        T.ResizeShortestEdge(short_edge_length=(512, 768, 1024), max_size=1333, sample_style='choice'),
        T.RandomFlip(horizontal=True),
        T.RandomBrightness(0.9, 1.1),
        T.RandomContrast(0.9, 1.1)
    ]
    transforms = T.AugmentationList(augmentation_list)(aug_input)

    image = aug_input.image
    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.get("annotations", []) if obj.get("iscrowd", 0) == 0
    ]
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
    dataset_dict["instances"] = utils.annotations_to_instances(annos, image.shape[:2])
    return dataset_dict

# Custom trainer that logs and saves best model
class TrainerWithBest(DefaultTrainer):
    def __init__(self, cfg):
        super().__init__(cfg)
        self.best_total_loss = float("inf")
        self.best_epoch = -1
        self.epoch_counter = 0
        self.best_model_path = os.path.join(cfg.OUTPUT_DIR, "best_model.pth")

    def run_step(self):
        self.model.train()
        data = next(self._trainer._data_loader_iter)
        loss_dict = self.model(data)
        total_loss = sum(loss_dict.values())
    
        self.optimizer.zero_grad()
        total_loss.backward()
        self.optimizer.step()
    
        loss_items = {k: v.item() for k, v in loss_dict.items()}
        total_loss_val = total_loss.item()
    
        print(f"Epoch {self.epoch_counter:04d} | " +
              " | ".join([f"{k}: {v:.4f}" for k, v in loss_items.items()]) +
              f" | Total Loss: {total_loss_val:.4f}")
    
        if total_loss_val < self.best_total_loss:
            self.best_total_loss = total_loss_val
            self.best_epoch = self.epoch_counter
            torch.save(self.model.state_dict(), self.best_model_path)
            print(f" Best model saved at Epoch {self.epoch_counter} | Total Loss: {total_loss_val:.4f}")
    
        self.epoch_counter += 1


    @classmethod
    def build_train_loader(cls, cfg):
        print(" Augmentations applied: ResizeShortestEdge(512,768,1024), RandomFlip, RandomBrightness(0.9–1.1), RandomContrast(0.9–1.1)")
        return build_detection_train_loader(cfg, mapper=custom_mapper)

# Config setup
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("solar_train",)
cfg.DATASETS.TEST = ()  # Skip validation
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3000
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.INPUT.MIN_SIZE_TRAIN = (512, 768, 1024)
cfg.INPUT.MAX_SIZE_TRAIN = 1333
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg.OUTPUT_DIR = OUTPUT_DIR

# Start training
trainer = TrainerWithBest(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

print("\n Training completed!")
print(f" Best Model => Epoch: {trainer.best_epoch}, Total Loss: {trainer.best_total_loss:.4f}")
print(f" Model saved at: {trainer.best_model_path}")


[32m[07/29 18:21:07 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[07/29 18:21:07 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch 0000 | loss_cls: 0.6695 | loss_box_reg: 0.5953 | loss_mask: 0.6902 | loss_rpn_cls: 0.0647 | loss_rpn_loc: 0.0166 | Total Loss: 2.0363
 Best model saved at Epoch 0 | Total Loss: 2.0363
Epoch 0001 | loss_cls: 0.7447 | loss_box_reg: 0.3940 | loss_mask: 0.6896 | loss_rpn_cls: 0.1870 | loss_rpn_loc: 0.2667 | Total Loss: 2.2821
Epoch 0002 | loss_cls: 0.6893 | loss_box_reg: 0.7471 | loss_mask: 0.6895 | loss_rpn_cls: 1.1007 | loss_rpn_loc: 0.1617 | Total Loss: 3.3883
Epoch 0003 | loss_cls: 0.7095 | loss_box_reg: 0.3764 | loss_mask: 0.6892 | loss_rpn_cls: 0.0781 | loss_rpn_loc: 0.0216 | Total Loss: 1.8748
 Best model saved at Epoch 3 | Total Loss: 1.8748
Epoch 0004 | loss_cls: 0.6494 | loss_box_reg: 0.6982 | loss_mask: 0.6889 | loss_rpn_cls: 0.0520 | loss_rpn_loc: 0.0072 | Total Loss: 2.0957
Epoch 0005 | loss_cls: 0.7258 | loss_box_reg: 0.2822 | loss_mask: 0.6895 | loss_rpn_cls: 0.0165 | loss_rpn_loc: 0.0188 | Total Loss: 1.7328
 Best model saved at Epoch 5 | Total Loss: 1.7328
Epoch 0006