Libraries Used

In [None]:
import tensorflow as tf
import numpy as np
import json as js
import os
from keras.models import Model #type: ignore
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Reshape #type: ignore
from keras.optimizers import Adam #type: ignore
from keras.utils import Sequence #type: ignore
from PIL import Image

Loading Paths

In [None]:
with open("config.json",'r') as file:
    paths = js.load(file)

In [None]:
def build_object_detector():
    inputs = Input(shape=(256, 256, 3))
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(4, activation='sigmoid')(x)
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(), loss='mse')
    return model

def load_json(json_path):
    with open(json_path, 'r') as f:
        return js.load(f)

class ObjectDetectionDataGenerator(Sequence):
    def __init__(self, json_path, image_dir, batch_size=16):
        self.bbox_data = load_json(json_path)
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.image_names = list(self.bbox_data.keys())
    
    def __len__(self):
        return int(np.floor(len(self.image_names) / self.batch_size))
    
    def __getitem__(self, index):
        batch_images = self.image_names[index * self.batch_size:(index + 1) * self.batch_size]
        images, labels = [], []
        for img_name in batch_images:
            img_path = os.path.join(self.image_dir, img_name)
            bbox = self.bbox_data[img_name]
            image = Image.open(img_path).convert('RGB').resize((224, 224))
            images.append(np.array(image) / 255.0)
            labels.append(np.array(bbox) / 224.0)
        return np.array(images), np.array(labels)

def train_object_detector(model, json_path, image_dir, epochs=10, batch_size=16):
    train_generator = ObjectDetectionDataGenerator(json_path, image_dir, batch_size)
    model.fit(train_generator, epochs=epochs)
    return model

model = build_object_detector()
trained_model = train_object_detector(model, paths[], paths[])


I0000 00:00:1742907811.805858   47626 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1742907812.193558   47626 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1742907812.200184   47626 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1742907812.210057   47626 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1742907814.651395   47915 service.cc:146] XLA service 0x7338f400a6e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742907814.651418   47915 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2025-03-25 18:33:34.707747: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-03-25 18:33:34.885857: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90101


[1m   6/7329[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:19[0m 27ms/step - loss: 0.5076

I0000 00:00:1742907820.489340   47915 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 24ms/step - loss: 0.4268
Epoch 2/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m216s[0m 29ms/step - loss: 0.3964
Epoch 3/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 26ms/step - loss: 0.3769
Epoch 4/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 16ms/step - loss: 0.3418
Epoch 5/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 16ms/step - loss: 0.3013
Epoch 6/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 29ms/step - loss: 0.2727
Epoch 7/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 30ms/step - loss: 0.2564
Epoch 8/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 28ms/step - loss: 0.2484
Epoch 9/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 26ms/step - loss: 0.2401
Epoch 10/10
[1m7329/7329[0m [32m━━━━━━━━━━━━━━━━━━━━[