<a href="https://colab.research.google.com/github/alexandrufalk/Background-generator/blob/Layout-page/CenterNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
import tensorflow_datasets as tfds


#Download Pascal VOC Dataset

In [2]:
# Load Pascal VOC 2007 dataset with bounding boxes
dataset, info = tfds.load("voc/2007", split="train+validation", with_info=True)

Downloading and preparing dataset 868.85 MiB (download: 868.85 MiB, generated: Unknown size, total: 868.85 MiB) to /root/tensorflow_datasets/voc/2007/4.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating test examples...:   0%|          | 0/4952 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/voc/2007/incomplete.8NREZ5_4.0.0/voc-test.tfrecord*...:   0%|          | 0…

Generating train examples...:   0%|          | 0/2501 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/voc/2007/incomplete.8NREZ5_4.0.0/voc-train.tfrecord*...:   0%|          | …

Generating validation examples...:   0%|          | 0/2510 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/voc/2007/incomplete.8NREZ5_4.0.0/voc-validation.tfrecord*...:   0%|       …

Dataset voc downloaded and prepared to /root/tensorflow_datasets/voc/2007/4.0.0. Subsequent calls will reuse this data.


#Explore the Dataset

In [3]:
# Check dataset info
print(info)

# Example: Iterate through the dataset
for sample in dataset.take(1):
    image = sample["image"]
    bbox = sample["objects"]["bbox"]
    label = sample["objects"]["label"]
    print(f"Image shape: {image.shape}")
    print(f"Bounding Boxes: {bbox}")
    print(f"Labels: {label}")

tfds.core.DatasetInfo(
    name='voc',
    full_name='voc/2007/4.0.0',
    description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge,
    corresponding to the Classification and Detection competitions.
    
    In the Classification competition, the goal is to predict the set of labels
    contained in the image, while in the Detection competition the goal is to
    predict the bounding box and label of each individual object.
    annotations.
    """,
    config_description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge
    2007, a.k.a. VOC2007.
    
    A total of 9963 images are included in this dataset, where each image
    contains a set of objects, out of 20 different classes, making a total of
    24640 annotated objects.
    
    """,
    homepage='http://host.robots.ox.ac.uk/pascal/VOC/voc2007/',
    data_dir=PosixGPath('/tmp/tmpkk4o9h29tfds'),
    file_format=tfrecord,
    download_size=868.85 Mi

#Define the Backbone

In [None]:
def create_backbone(input_shape=(512, 512, 3)):
    # Use ResNet50 as backbone without top layers, outputting feature maps
    backbone = ResNet50(input_shape=input_shape, include_top=False, weights="imagenet")
    backbone_output = backbone.get_layer("conv4_block6_out").output  # Choose a layer
    model = tf.keras.Model(inputs=backbone.input, outputs=backbone_output)
    return model

#Detection Head
The detection head will predict:

-Heatmap: to identify object centers.

-Offsets: to adjust for downsampling effects.

-Sizes: to predict the width and height of objects.


In [None]:
def create_detection_head(backbone_output, num_classes):
    # Heatmap head
    heatmap = tf.keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu")(backbone_output)
    heatmap = tf.keras.layers.Conv2D(num_classes, kernel_size=1, padding="same", activation="sigmoid", name="heatmap")(heatmap)

    # Offset head
    offset = tf.keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu")(backbone_output)
    offset = tf.keras.layers.Conv2D(2, kernel_size=1, padding="same", name="offset")(offset)  # 2 channels for x and y offsets

    # Size head
    size = tf.keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu")(backbone_output)
    size = tf.keras.layers.Conv2D(2, kernel_size=1, padding="same", name="size")(size)  # 2 channels for width and height

    return heatmap, offset, size


#Build the Complete CenterNet Model

In [None]:
def create_centernet(input_shape=(512, 512, 3), num_classes=80):
    inputs = tf.keras.Input(shape=input_shape)
    backbone_output = create_backbone(input_shape)(inputs)
    heatmap, offset, size = create_detection_head(backbone_output, num_classes)
    model = tf.keras.Model(inputs=inputs, outputs=[heatmap, offset, size])
    return model


#Compile the Model

In [None]:
def focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        alpha_factor = tf.where(tf.equal(y_true, 1), alpha, 1 - alpha)
        focal_weight = tf.where(tf.equal(y_true, 1), 1 - y_pred, y_pred)
        focal_weight = alpha_factor * tf.pow(focal_weight, gamma)
        bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
        return focal_weight * bce
    return focal_loss_fixed

def smooth_l1_loss(y_true, y_pred):
    return tf.keras.losses.Huber()(y_true, y_pred)

centernet = create_centernet()
centernet.compile(optimizer='adam',
                  loss={'heatmap': focal_loss(),
                        'offset': smooth_l1_loss,
                        'size': smooth_l1_loss})

#Train the Model

In [None]:
history = centernet.fit(train_dataset, validation_data=val_dataset, epochs=20)