Libraries Used

In [1]:
import json as js
import tensorflow as tf
from os import listdir as ld
from os import path
from keras.models import Model #type: ignore
from keras import layers,models #type: ignore
from tqdm import tqdm
from keras.utils import to_categorical #type: ignore
from keras.losses import Huber #type: ignore
from collections import defaultdict
import random

2025-04-20 23:29:15.091178: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-20 23:29:15.126171: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-20 23:29:15.136297: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-20 23:29:15.310051: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading Paths

In [2]:
with open("config_CNN.json",'r') as file:
    paths = js.load(file)

Creating Datasets

In [3]:
# Paths and constants
train_img_dir = paths['Train_resized']
train_annotation = paths['Preprocessed_Train_json']
val_img_dir = paths['Validation_resized']
val_annotation = paths['Preprocessed_Validation_json']
NUM_CLASSES = 80

# Load annotations
with open(train_annotation, 'r') as f:
    train_annotations = js.load(f)

with open(val_annotation, 'r') as f:
    val_annotations = js.load(f)

train_data = []
val_data = []

print("Processing Training Data")

for info in tqdm(train_annotations, desc="Training Annotations"):
    bbox = info['bbox']
    label = info['category_id']
    image_path = path.join(train_img_dir, info["img_id"])
    train_data.append((image_path, bbox, label))

print("Processing Validation Data")

for info in tqdm(val_annotations, desc="Validation Annotations"):
    bbox = info['bbox']
    label = info['category_id']
    image_path = path.join(val_img_dir, info["img_id"])
    val_data.append((image_path, bbox, label))

def preprocess_example(image_path, bbox, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [64, 64])
    image = tf.cast(image, tf.float32) / 255.0

    img_shape = tf.shape(image)
    img_h = tf.cast(img_shape[0], tf.float32)
    img_w = tf.cast(img_shape[1], tf.float32)

    # Convert bbox to tensor if not already
    bbox = tf.convert_to_tensor(bbox, dtype=tf.float32)

    x = bbox[0] / img_w
    y = bbox[1] / img_h
    w = bbox[2] / img_w
    h = bbox[3] / img_h

    bbox_tensor = tf.stack([x, y, w, h])
    class_tensor = tf.one_hot(label, depth=NUM_CLASSES)

    return image, {
        "class_output": class_tensor,
        "bbox_output": bbox_tensor
    }


def create_dataset(data, batch_size=4, shuffle=True, show_progress=False):
    if show_progress:
        print("Preprocessing dataset with progress bar...")
        processed = []
        for item in tqdm(data, desc="Preprocessing Samples"):
            processed.append(preprocess_example(*item))
        dataset = tf.data.Dataset.from_tensor_slices(processed)
    else:
        paths, bboxes, labels = zip(*data)
        dataset = tf.data.Dataset.from_tensor_slices((list(paths), list(bboxes), list(labels)))
        dataset = dataset.map(lambda p, b, l: preprocess_example(p, b, l), num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(data))

    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Create TensorFlow datasets
train_dataset = create_dataset(train_data, batch_size=2, show_progress=False)
val_dataset = create_dataset(val_data, batch_size=2, shuffle=False, show_progress=False)


Processing Training Data


Training Annotations: 100%|██████████| 2580003/2580003 [00:03<00:00, 797783.53it/s] 


Processing Validation Data


Validation Annotations: 100%|██████████| 110343/110343 [00:00<00:00, 572390.53it/s]
I0000 00:00:1745171974.463270    5377 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1745171974.850310    5377 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1745171974.852625    5377 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1745

Defining model

In [4]:
def compute_iou_tf(box1, box2):
    box1_x1 = box1[:, 0]
    box1_y1 = box1[:, 1]
    box1_x2 = box1[:, 0] + box1[:, 2]
    box1_y2 = box1[:, 1] + box1[:, 3]
    box2_x1 = box2[:, 0]
    box2_y1 = box2[:, 1]
    box2_x2 = box2[:, 0] + box2[:, 2]
    box2_y2 = box2[:, 1] + box2[:, 3]
    x1 = tf.maximum(box1_x1, box2_x1)
    y1 = tf.maximum(box1_y1, box2_y1)
    x2 = tf.minimum(box1_x2, box2_x2)
    y2 = tf.minimum(box1_y2, box2_y2)
    intersection = tf.maximum(0.0, x2 - x1) * tf.maximum(0.0, y2 - y1)
    area1 = box1[:, 2] * box1[:, 3]
    area2 = box2[:, 2] * box2[:, 3]
    union = area1 + area2 - intersection
    return tf.math.divide_no_nan(intersection, union)

In [5]:
class IoUMetric(tf.keras.metrics.Metric):
    def __init__(self, name='iou_metric', **kwargs):
        super(IoUMetric, self).__init__(name=name, **kwargs)
        self.total_iou = self.add_weight(name='total_iou', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.reshape(y_true, (-1, 4))
        y_pred = tf.reshape(y_pred, (-1, 4))
        ious = compute_iou_tf(y_true, y_pred)
        self.total_iou.assign_add(tf.reduce_sum(ious))
        self.count.assign_add(tf.cast(tf.size(ious), tf.float32))

    def result(self):
        return tf.math.divide_no_nan(self.total_iou, self.count)

    def reset_states(self):
        self.total_iou.assign(0.0)
        self.count.assign(0.0)


In [6]:
def mbconv_block(x, filters, kernel_size=(3, 3), strides=(1, 1), expand_ratio=4):
    input_tensor = x
    in_channels = x.shape[-1]
    x = layers.Conv2D(in_channels * expand_ratio, (1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.DepthwiseConv2D(kernel_size, strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters, (1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    if strides == (1, 1) and in_channels == filters:
        x = layers.Add()([input_tensor, x])
    return x

def simple_efficient_model(input_shape=(64, 64, 3), num_classes=80):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), strides=(2, 2), padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = mbconv_block(x, 64, strides=(1, 1), expand_ratio=2)
    x = mbconv_block(x, 64, strides=(2, 2), expand_ratio=2)
    x = mbconv_block(x, 128, strides=(1, 1), expand_ratio=4)
    x = layers.GlobalAveragePooling2D()(x)
    cls = layers.Dense(128, activation='relu')(x)
    cls = layers.Dropout(0.4)(cls)
    class_output = layers.Dense(num_classes, activation='softmax', name="class_output")(cls)
    bbox = layers.Dense(64, activation='relu')(x)
    bbox_output = layers.Dense(4, activation='sigmoid', name="bbox_output")(bbox)
    model = models.Model(inputs, {"class_output": class_output, "bbox_output": bbox_output})
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-4),
        loss={
            "class_output": "categorical_crossentropy",
            "bbox_output": Huber()
        },
        metrics={
            "class_output": "accuracy",
            "bbox_output": Huber()
        }
    )
    return model

model = simple_efficient_model()

Checkpoints

In [7]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath = paths["Trained_model"],
    monitor = 'bbox_output_huber_loss',
    save_best_only = True,
    save_weights_only = False,
    mode = 'min',
    verbose = 1
)

Model training 

In [None]:
model.fit(
    train_dataset,
    epochs = 2,
    validation_data = val_dataset,
    callbacks = [checkpoint]
)

Epoch 1/2


2025-04-20 23:30:03.138427: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 211524 of 2580003
