In [10]:
# Step 4: Import necessary libraries
import os
import numpy as np
from PIL import Image
import json
from tqdm import tqdm
import cv2
import sys
sys.path.append(r'C:\Users\absolute-zero\Desktop\NNIS\examples\Mask2Former')


In [4]:
# Step 5: Create directories for data
os.makedirs('data/train_images', exist_ok=True)
os.makedirs('data/val_images', exist_ok=True)
os.makedirs('data/annotations', exist_ok=True)


In [11]:
# Step 6: Generate synthetic dataset
def create_synthetic_dataset(num_images, image_dir, annotation_file):
    images = []
    annotations = []
    annotation_id = 1

    for img_id in tqdm(range(1, num_images + 1)):
        # Create a random image
        image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
        img = Image.fromarray(image)
        img_filename = f'image_{img_id}.png'
        img.save(os.path.join(image_dir, img_filename))

        # Create a random mask with a simple shape
        mask = np.zeros((256, 256), dtype=np.uint8)
        cv2.rectangle(mask, (60, 60), (196, 196), 1, -1)  # Draw a filled rectangle
        mask_image = Image.fromarray(mask * 255)
        mask_filename = f'mask_{img_id}.png'
        mask_image.save(os.path.join(image_dir, mask_filename))

        # Create segmentation polygon for the rectangle
        segmentation = [[60, 60, 196, 60, 196, 196, 60, 196]]
        area = 136 * 136  # Area of the rectangle
        bbox = [60, 60, 136, 136]  # x, y, width, height

        # Image info
        images.append({
            'file_name': img_filename,
            'height': 256,
            'width': 256,
            'id': img_id
        })

        # Annotation info
        annotations.append({
            'id': annotation_id,
            'image_id': img_id,
            'category_id': 1,
            'segmentation': segmentation,
            'area': area,
            'bbox': bbox,
            'iscrowd': 0
        })

        annotation_id += 1

    # Categories
    categories = [{
        'id': 1,
        'name': 'rectangle',
        'supercategory': 'shape'
    }]

    # Create annotation file in COCO format
    annotation_data = {
        'images': images,
        'annotations': annotations,
        'categories': categories
    }

    with open(annotation_file, 'w') as f:
        json.dump(annotation_data, f)

# Create training data
create_synthetic_dataset(10, 'data/train_images', 'data/annotations/instances_train.json')

# Create validation data
create_synthetic_dataset(2, 'data/val_images', 'data/annotations/instances_val.json')


100%|██████████| 10/10 [00:00<00:00, 87.75it/s]
100%|██████████| 2/2 [00:00<00:00, 70.17it/s]


In [6]:
# Step 7: Register datasets with Detectron2
from detectron2.data.datasets import register_coco_instances

register_coco_instances("synthetic_train", {}, "data/annotations/instances_train.json", "data/train_images")
register_coco_instances("synthetic_val", {}, "data/annotations/instances_val.json", "data/val_images")


In [16]:
# Step 8: Configure the Mask2Former model
import detectron2
from detectron2.config import get_cfg
from mask2former import add_maskformer2_config
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
add_maskformer2_config(cfg)

cfg.merge_from_file("Mask2Former/configs/coco/instance-segmentation/maskformer2_R50_bs16_50ep.yaml")

cfg.DATASETS.TRAIN = ("synthetic_train",)
cfg.DATASETS.TEST = ("synthetic_val",)
cfg.DATALOADER.NUM_WORKERS = 1

cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 1

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 1000

cfg.OUTPUT_DIR = "./output_synthetic"

cfg.MODEL.WEIGHTS = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


ModuleNotFoundError: 

Please compile MultiScaleDeformableAttention CUDA op with the following commands:
	`cd mask2former/modeling/pixel_decoder/ops`
	`sh make.sh`


In [None]:
# Step 9: Train the model
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()


In [None]:
# Step 10: Save the model path
model_weights_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
print(f"Model weights saved to: {model_weights_path}")


In [None]:
# Step 11: Run inference to verify the model
import cv2
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer

cfg.MODEL.WEIGHTS = model_weights_path
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = ("synthetic_val", )
cfg.MODEL.DEVICE = "cpu"

predictor = DefaultPredictor(cfg)

image = cv2.imread("data/val_images/image_1.png")
outputs = predictor(image)

v = Visualizer(image[:, :, ::-1], scale=1.0)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
result_image = v.get_image()[:, :, ::-1]

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 8))
plt.imshow(result_image)
plt.axis('off')
plt.show()
