# Instance Segmentation

* Instance segmentation is a computer vision task that combines object detection (identifying objects in an image) with semantic segmentation (classifying each pixel into a category).

* ✅ Semantic Segmentation answers: "What is in each pixel?"

  ✅ Instance Segmentation answers: "Which specific object instance is in each pixel?"

* Example:

    In a street image, if there are 3 cars:

    * Semantic segmentation labels all pixels of all cars the same ("car").

    * Instance segmentation labels Car 1, Car 2, Car 3 separately, giving each one a unique mask.


# Implementation

In [None]:
!pip install numpy opencv-python tensorflow keras matplotlib pycocotools

In [None]:
!pip install git+https://github.com/matterport/Mask_RCNN.git

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from mrcnn import utils
from mrcnn.config import Config
from pycocotools.coco import COCO

In [None]:
# 1. Configuration
# ============================================
class CocoConfig(Config):
    NAME = "coco"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 81  # COCO has 80 classes + 1 background

# Initialize config
config = CocoConfig()
config.display()

In [None]:
# 2. Load COCO Dataset (Error-Handled)
# ============================================
import os
from pycocotools.coco import COCO

def load_coco_dataset(data_dir, subset):
    """Load COCO dataset with better error messages."""
    try:
        # Normalize paths to handle different OS formats
        data_dir = os.path.normpath(data_dir)

        # Paths to annotations and images
        ann_file = os.path.join(data_dir, "annotations", f"instances_{subset}.json")
        image_dir = os.path.join(data_dir, subset)

        # More detailed error checking
        if not os.path.exists(ann_file):
            available_files = os.listdir(os.path.join(data_dir, "annotations"))
            raise FileNotFoundError(
                f"Annotation file not found at {ann_file}\n"
                f"Available files: {available_files}"
            )

        if not os.path.exists(image_dir):
            available_dirs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
            raise FileNotFoundError(
                f"Image directory not found at {image_dir}\n"
                f"Available directories: {available_dirs}"
            )

        # Initialize COCO API
        coco = COCO(ann_file)
        image_ids = coco.getImgIds()
        print(f"Successfully loaded {len(image_ids)} images from {subset}")
        return coco, image_ids, image_dir

    except Exception as e:
        print(f"Error loading COCO dataset: {str(e)}")
        return None, None, None

# Example usage
COCO_DIR = "coco"  # Path to your coco directory
SUBSET = "val2017"

coco, image_ids, image_dir = load_coco_dataset(COCO_DIR, SUBSET)
if coco is None:
    print("Please check the dataset installation")
    print("Expected directory structure:")
    print("coco/")
    print("├── annotations/")
    print("│   └── instances_val2017.json")
    print("└── val2017/")
    print("    └── ...jpg files...")

In [None]:
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5

In [None]:
# 3. Load Model (Error-Handled)
import os
import requests
import warnings
from tqdm import tqdm

def download_model_weights(model_path):
    """Download Mask R-CNN weights if not found."""
    url = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"

    try:
        print(f"Downloading model weights from {url}...")
        response = requests.get(url, stream=True)
        response.raise_for_status()

        # Show progress bar
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024
        progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)

        with open(model_path, 'wb') as f:
            for data in response.iter_content(block_size):
                progress_bar.update(len(data))
                f.write(data)
        progress_bar.close()

        if total_size != 0 and progress_bar.n != total_size:
            warnings.warn("Download might be incomplete")

        print(f"Model weights saved to {model_path}")
        return True
    except Exception as e:
        print(f"Failed to download weights: {e}")
        return False

def load_model(model_path):
    """Load Mask R-CNN model with auto-download."""
    try:
        if not os.path.exists(model_path):
            print(f"Weights not found at {model_path}")
            if not download_model_weights(model_path):
                return None

        # Suppress TensorFlow logging
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        model = modellib.MaskRCNN(
            mode="inference",
            config=config,
            model_dir=os.getcwd()
        )

        # Verify weights file integrity
        file_size = os.path.getsize(model_path)
        if file_size < 200000000:  # ~200MB expected
            warnings.warn(f"Model weights file seems too small ({file_size} bytes)")

        model.load_weights(model_path, by_name=True)
        print("Model loaded successfully")
        return model

    except Exception as e:
        print(f"Error loading model: {e}")
        return None

MODEL_PATH = "mask_rcnn_coco.h5"
model = load_model(MODEL_PATH)
if model is None:
    print("Failed to load model. Possible solutions:")
    print("1. Download weights manually from:")
    print("   https://github.com/matterport/Mask_RCNN/releases")
    print("2. Place the file in your working directory")
    exit()



In [None]:
# 4. Run Inference on Dataset (Robust)
# ============================================
def run_inference(coco, image_ids, image_dir, model, num_samples=5):
    """Run instance segmentation on dataset samples."""
    for i in range(min(num_samples, len(image_ids))):
        try:
            # Load image
            image_info = coco.loadImgs(image_ids[i])[0]
            image_path = os.path.join(image_dir, image_info['file_name'])
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            if image is None:
                print(f"Could not read image {image_path}")
                continue

            # Run detection
            results = model.detect([image], verbose=0)[0]

            # Visualize results
            visualize(image, results, coco)

        except Exception as e:
            print(f"Error processing image {image_ids[i]}: {e}")
            continue

def visualize(image, results, coco):
    """Visualize instance segmentation results."""
    plt.figure(figsize=(12, 8))

    # Show masks
    masked_image = apply_masks(image, results['masks'], results['class_ids'])

    # Show bounding boxes and labels
    for i, (y1, x1, y2, x2) in enumerate(results['rois']):
        class_id = results['class_ids'][i]
        class_name = coco.loadCats(class_id)[0]['name']
        score = results['scores'][i]

        # Draw bounding box
        cv2.rectangle(masked_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Label with class and score
        label = f"{class_name} {score:.2f}"
        cv2.putText(
            masked_image, label, (x1, y1 - 5),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
        )

    plt.imshow(masked_image)
    plt.axis("off")
    plt.show()

def apply_masks(image, masks, class_ids):
    """Apply color masks to image."""
    colors = plt.cm.get_cmap('hsv', len(class_ids))
    masked_image = image.copy()

    for i in range(masks.shape[-1]):
        mask = masks[:, :, i]
        color = np.array(colors(i))[:3] * 255  # RGB only

        # Apply mask with transparency
        for c in range(3):
            masked_image[:, :, c] = np.where(
                mask == 1,
                masked_image[:, :, c] * 0.7 + 0.3 * color[c],
                masked_image[:, :, c]
            )

    return masked_image

In [None]:
# Run on 5 random samples
run_inference(coco, image_ids, image_dir, model, num_samples=5)

# Lab Task

* Load any dataset of your own choice.

* Apply any other architecture on that dataset.

* Train & Evaluate the model.

* Display Segmented Images.