U-Net (TensorFlow/Keras) on Pascal VOC

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
import os

# Example function to build U-Net model
def build_unet(input_shape=(128, 128, 3)):
    inputs = layers.Input(input_shape)
    
    # Contracting path (Encoder)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)
    
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Bottleneck
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)

    # Expanding path (Decoder)
    u1 = layers.UpSampling2D((2, 2))(c3)
    u1 = layers.Concatenate()([u1, c2])
    c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c4)

    u2 = layers.UpSampling2D((2, 2))(c4)
    u2 = layers.Concatenate()([u2, c1])
    c5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
    c5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c5)
    
    outputs = layers.Conv2D(21, (1, 1), activation='softmax')(c5)  # 21 classes for Pascal VOC
    
    model = models.Model(inputs, outputs)
    return model

# Load dataset (replace with your own data loader for Pascal VOC)
def load_pascal_voc():
    # Here you should load your Pascal VOC dataset (e.g., using tfds or manual data loading)
    pass

# Model compilation and training
model = build_unet()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load your dataset here (Pascal VOC)
# x_train, y_train = load_pascal_voc()

# Train the model
# model.fit(x_train, y_train, batch_size=32, epochs=50)

# Evaluate the model (use the test set)
# test_loss, test_acc = model.evaluate(x_test, y_test)
# print(f"Test accuracy: {test_acc:.4f}")


Mask R-CNN (PyTorch) on COCO

In [None]:
import torch
from torchvision import models, transforms
from PIL import Image
import matplotlib.pyplot as plt

# Load pre-trained Mask R-CNN model
model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Load and preprocess image
def load_and_preprocess_image(image_path):
    transform = transforms.Compose([transforms.ToTensor()])
    image = Image.open(image_path).convert("RGB")
    image = transform(image)
    return image.unsqueeze(0)  # Add batch dimension

# Example: Load COCO image
image = load_and_preprocess_image('path_to_your_image.jpg')

# Perform segmentation
with torch.no_grad():
    prediction = model(image)

# Extract masks and plot
masks = prediction[0]['masks']  # Shape [num_objects, height, width]
labels = prediction[0]['labels']
scores = prediction[0]['scores']

# Display the first mask (if there are any detections)
if len(masks) > 0:
    mask = masks[0, 0]  # Mask for the first detected object
    plt.imshow(mask.cpu().numpy(), cmap='gray')
    plt.title(f"Detected Class: {labels[0]} with Score: {scores[0]:.4f}")
    plt.show()


Evaluating Performance by measuring accuracy, IoU, AP, etc.

In [None]:
from pycocotools.coco import COCO

# Load COCO ground truth and results
coco_gt = COCO('annotations/instances_val2017.json')
coco_dt = coco_gt.loadRes('predictions.json')  # Load your model's prediction results

# Evaluate performance
coco_eval = COCOeval(coco_gt, coco_dt, 'segm')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()