<a href="https://colab.research.google.com/github/alexandrufalk/Background-generator/blob/Layout-page/CenterNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
import tensorflow_datasets as tfds


#Download Pascal VOC Dataset

In [2]:
# Load Pascal VOC 2007 dataset with bounding boxes
dataset, info = tfds.load("voc/2007", split="train+validation", with_info=True)

#Explore the Dataset

In [3]:
# Check dataset info
print(info)

# Example: Iterate through the dataset
for sample in dataset.take(1):
    image = sample["image"]
    bbox = sample["objects"]["bbox"]
    label = sample["objects"]["label"]
    print(f"Image shape: {image.shape}")
    print(f"Bounding Boxes: {bbox}")
    print(f"Labels: {label}")

tfds.core.DatasetInfo(
    name='voc',
    full_name='voc/2007/4.0.0',
    description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge,
    corresponding to the Classification and Detection competitions.
    
    In the Classification competition, the goal is to predict the set of labels
    contained in the image, while in the Detection competition the goal is to
    predict the bounding box and label of each individual object.
    annotations.
    """,
    config_description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge
    2007, a.k.a. VOC2007.
    
    A total of 9963 images are included in this dataset, where each image
    contains a set of objects, out of 20 different classes, making a total of
    24640 annotated objects.
    
    """,
    homepage='http://host.robots.ox.ac.uk/pascal/VOC/voc2007/',
    data_dir='/root/tensorflow_datasets/voc/2007/4.0.0',
    file_format=tfrecord,
    download_size=8

#Preprocess the Dataset

In [3]:
def preprocess_sample_fixed(sample, max_boxes=100):
    image = tf.image.resize(sample["image"], (512, 512)) / 255.0  # Normalize image
    boxes = sample["objects"]["bbox"]  # [ymin, xmin, ymax, xmax]
    classes = sample["objects"]["label"]

    # Convert to CenterNet format
    center_x = (boxes[:, 1] + boxes[:, 3]) / 2.0  # (xmin + xmax) / 2
    center_y = (boxes[:, 0] + boxes[:, 2]) / 2.0  # (ymin + ymax) / 2
    width = boxes[:, 3] - boxes[:, 1]  # xmax - xmin
    height = boxes[:, 2] - boxes[:, 0]  # ymax - ymin
    centernet_boxes = tf.stack([center_x, center_y, width, height], axis=-1)

    # Pad to fixed size
    padded_boxes = tf.pad(centernet_boxes, [[0, max_boxes - tf.shape(centernet_boxes)[0]], [0, 0]])
    padded_classes = tf.pad(classes, [[0, max_boxes - tf.shape(classes)[0]]])

    return image, {"boxes": padded_boxes, "classes": padded_classes}

train_dataset = dataset.map(lambda x: preprocess_sample_fixed(x)).batch(32).shuffle(1000)

#Generate Heatmaps

In [1]:
def generate_heatmap(boxes, labels, output_shape=(128, 128), num_classes=20, sigma=2):
    heatmap = tf.zeros([output_shape[0], output_shape[1], num_classes], dtype=tf.float32)

    for box, label in zip(boxes, labels):
        cx, cy, w, h = box
        grid_x, grid_y = int(cx * output_shape[1]), int(cy * output_shape[0])

        # Draw Gaussian around the center
        for i in range(max(0, grid_x - 2 * sigma), min(output_shape[1], grid_x + 2 * sigma)):
            for j in range(max(0, grid_y - 2 * sigma), min(output_shape[0], grid_y + 2 * sigma)):
                heatmap = tf.tensor_scatter_nd_update(
                    heatmap,
                    indices=[[j, i, label]],
                    updates=[tf.exp(-((i - grid_x) ** 2 + (j - grid_y) ** 2) / (2 * sigma ** 2))]
                )

    return heatmap


In [None]:
for image, targets in train_dataset:
    heatmap = generate_heatmap(targets["boxes"], targets["classes"])
    # Combine heatmap, offsets, and sizes into one structure for training
    # Pass the data to your model training loop.

#Define the Backbone

In [None]:
def create_backbone(input_shape=(512, 512, 3)):
    # Use ResNet50 as backbone without top layers, outputting feature maps
    backbone = ResNet50(input_shape=input_shape, include_top=False, weights="imagenet")
    backbone_output = backbone.get_layer("conv4_block6_out").output  # Choose a layer
    model = tf.keras.Model(inputs=backbone.input, outputs=backbone_output)
    return model

#Detection Head
The detection head will predict:

-Heatmap: to identify object centers.

-Offsets: to adjust for downsampling effects.

-Sizes: to predict the width and height of objects.


In [None]:
def create_detection_head(backbone_output, num_classes):
    # Heatmap head
    heatmap = tf.keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu")(backbone_output)
    heatmap = tf.keras.layers.Conv2D(num_classes, kernel_size=1, padding="same", activation="sigmoid", name="heatmap")(heatmap)

    # Offset head
    offset = tf.keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu")(backbone_output)
    offset = tf.keras.layers.Conv2D(2, kernel_size=1, padding="same", name="offset")(offset)  # 2 channels for x and y offsets

    # Size head
    size = tf.keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu")(backbone_output)
    size = tf.keras.layers.Conv2D(2, kernel_size=1, padding="same", name="size")(size)  # 2 channels for width and height

    return heatmap, offset, size


#Build the Complete CenterNet Model

In [None]:
def create_centernet(input_shape=(512, 512, 3), num_classes=80):
    inputs = tf.keras.Input(shape=input_shape)
    backbone_output = create_backbone(input_shape)(inputs)
    heatmap, offset, size = create_detection_head(backbone_output, num_classes)
    model = tf.keras.Model(inputs=inputs, outputs=[heatmap, offset, size])
    return model


#Compile the Model

In [None]:
def focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        alpha_factor = tf.where(tf.equal(y_true, 1), alpha, 1 - alpha)
        focal_weight = tf.where(tf.equal(y_true, 1), 1 - y_pred, y_pred)
        focal_weight = alpha_factor * tf.pow(focal_weight, gamma)
        bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
        return focal_weight * bce
    return focal_loss_fixed

def smooth_l1_loss(y_true, y_pred):
    return tf.keras.losses.Huber()(y_true, y_pred)

centernet = create_centernet()
centernet.compile(optimizer='adam',
                  loss={'heatmap': focal_loss(),
                        'offset': smooth_l1_loss,
                        'size': smooth_l1_loss})

#Train the Model

In [None]:
history = centernet.fit(train_dataset, validation_data=val_dataset, epochs=20)