<a href="https://colab.research.google.com/github/hosseinsyd997/colab/blob/main/object_detection_model_trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import os
import shutil

# Unzip the model.zip file
with zipfile.ZipFile('model.zip', 'r') as zip_ref:
    zip_ref.extractall('extracted_model')

# Copy the contents of the model folder to the root directory
source_dir = 'extracted_model/model'
target_dir = './Data'

for item in os.listdir(source_dir):
    s = os.path.join(source_dir, item)
    d = os.path.join(target_dir, item)
    if os.path.isdir(s):
        shutil.copytree(s, d, dirs_exist_ok=True)
    else:
        shutil.copy2(s, d)

print("Files copied successfully!")

Files copied successfully!


In [9]:
!pip install tensorflowjs

import os
import xml.etree.ElementTree as ET
import tensorflow as tf
import tensorflowjs as tfjs
import pandas as pd

# Define paths
image_dir = 'Data/images'
annotation_dir = 'Data/annotations'

# Function to parse XML files
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_info = {
        'image_path': os.path.join(image_dir, root.find('filename').text),
        'width': int(root.find('size').find('width').text),
        'height': int(root.find('size').find('height').text),
        'objects': []
    }

    for obj in root.findall('object'):
        obj_info = {
            'name': obj.find('name').text,
            'xmin': int(obj.find('bndbox').find('xmin').text),
            'ymin': int(obj.find('bndbox').find('ymin').text),
            'xmax': int(obj.find('bndbox').find('xmax').text),
            'ymax': int(obj.find('bndbox').find('ymax').text)
        }
        image_info['objects'].append(obj_info)

    return image_info

# Iterate through annotation files and parse them
annotations = []
for xml_file in os.listdir(annotation_dir):
    if xml_file.endswith('.xml'):
        annotations.append(parse_xml(os.path.join(annotation_dir, xml_file)))

# Display a sample of the parsed data
for i in range(2):
    print(annotations[i])


{'image_path': 'Data/images/70c171cc-photo_7_2025-07-27_04-38-43.jpg', 'width': 1280, 'height': 1001, 'objects': [{'name': 'RP', 'xmin': 743, 'ymin': 348, 'xmax': 753, 'ymax': 355}, {'name': 'RP', 'xmin': 887, 'ymin': 361, 'xmax': 893, 'ymax': 367}]}
{'image_path': 'Data/images/85b0aaa2-photo_6_2025-07-27_04-38-43.jpg', 'width': 1280, 'height': 1001, 'objects': [{'name': 'RP', 'xmin': 539, 'ymin': 323, 'xmax': 545, 'ymax': 325}, {'name': 'RP', 'xmin': 714, 'ymin': 318, 'xmax': 723, 'ymax': 327}]}


In [10]:
import numpy as np

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 128])
    image = image / 255.0  # Normalize to [0,1]
    return image

# Get all unique labels
all_labels = [obj['name'] for ann in annotations for obj in ann['objects']]
unique_labels = sorted(list(set(all_labels)))
label_to_id = {label: i for i, label in enumerate(unique_labels)}
num_classes = len(unique_labels)

def create_dataset(annotations):
    image_paths = []
    boxes = []
    labels = []

    for ann in annotations:
        if not ann['objects']:
            continue

        image_paths.append(ann['image_path'])

        # Take only the first object for simplicity
        obj = ann['objects'][0]

        # Normalize bounding box coordinates
        xmin = obj['xmin'] / ann['width']
        ymin = obj['ymin'] / ann['height']
        xmax = obj['xmax'] / ann['width']
        ymax = obj['ymax'] / ann['height']
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_to_id[obj['name']])

    # Create a TensorFlow dataset
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)
    image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

    box_ds = tf.data.Dataset.from_tensor_slices(tf.constant(boxes, dtype=tf.float32))
    label_ds = tf.data.Dataset.from_tensor_slices(tf.constant(labels, dtype=tf.int64)).map(lambda x: tf.one_hot(x, depth=num_classes))
    label_ds = label_ds.map(lambda x: tf.squeeze(x, axis=0))


    # Zip the datasets together
    dataset = tf.data.Dataset.zip((image_ds, (box_ds, label_ds)))

    return dataset

# Create the final dataset
dataset = create_dataset(annotations)

# Print the element spec of the dataset to verify its structure
print(dataset.element_spec)

(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), (TensorSpec(shape=(4,), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.float32, name=None)))


In [11]:
import numpy as np

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 128])
    image = image / 255.0  # Normalize to [0,1]
    return image

# Get all unique labels
all_labels = [obj['name'] for ann in annotations for obj in ann['objects']]
unique_labels = sorted(list(set(all_labels)))
label_to_id = {label: i for i, label in enumerate(unique_labels)}
num_classes = len(unique_labels)

def create_dataset(annotations):
    image_paths = []
    boxes = []
    labels = []

    for ann in annotations:
        if not ann['objects']:
            continue

        image_paths.append(ann['image_path'])

        # Take only the first object for simplicity
        obj = ann['objects'][0]

        # Normalize bounding box coordinates
        xmin = obj['xmin'] / ann['width']
        ymin = obj['ymin'] / ann['height']
        xmax = obj['xmax'] / ann['width']
        ymax = obj['ymax'] / ann['height']
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_to_id[obj['name']])

    # Create a TensorFlow dataset
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)
    image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

    box_ds = tf.data.Dataset.from_tensor_slices(tf.constant(boxes, dtype=tf.float32))
    label_ds = tf.data.Dataset.from_tensor_slices(tf.constant(labels, dtype=tf.int64))

    # One-hot encode the labels. The model's loss function for classification is BinaryCrossentropy,
    # which expects a single value per image since we have one output neuron.
    # If num_classes > 1, you would use tf.one_hot and CategoricalCrossentropy.
    if num_classes == 1:
        label_ds = label_ds.map(lambda x: tf.cast(x, tf.float32))
        label_ds = label_ds.map(lambda x: tf.expand_dims(x, axis=-1)) # Reshape to (1,)
    else:
        label_ds = label_ds.map(lambda x: tf.one_hot(x, depth=num_classes))


    # Zip the datasets together
    dataset = tf.data.Dataset.zip((image_ds, (box_ds, label_ds)))

    return dataset

# Create the final dataset
dataset = create_dataset(annotations)

# Print the element spec of the dataset to verify its structure
print(dataset.element_spec)

(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), (TensorSpec(shape=(4,), dtype=tf.float32, name=None), TensorSpec(shape=(1,), dtype=tf.float32, name=None)))


In [12]:
import xml.etree.ElementTree as ET
import os

def parse_annotation(xml_file_path):
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    image_path = root.find('path').text if root.find('path') is not None else None
    width_elem = root.find('size/width')
    height_elem = root.find('size/height')

    if width_elem is None or height_elem is None:
        return None

    width = int(width_elem.text)
    height = int(height_elem.text)

    objects = []
    for obj in root.findall('object'):
        name_elem = obj.find('name')
        bndbox = obj.find('bndbox')
        if bndbox is not None and name_elem is not None:
            xmin_elem = bndbox.find('xmin')
            ymin_elem = bndbox.find('ymin')
            xmax_elem = bndbox.find('xmax')
            ymax_elem = bndbox.find('ymax')

            if all(elem is not None for elem in [xmin_elem, ymin_elem, xmax_elem, ymax_elem]):
                objects.append({
                    'name': name_elem.text,
                    'xmin': int(xmin_elem.text),
                    'ymin': int(ymin_elem.text),
                    'xmax': int(xmax_elem.text),
                    'ymax': int(ymax_elem.text)
                })

    # If image_path is not directly available, construct it based on the filename
    if image_path is None and root.find('filename') is not None:
        filename = root.find('filename').text
        image_path = os.path.join('Data/images', filename)

    if not image_path or not objects:
        return None


    return {
        'image_path': image_path,
        'width': width,
        'height': height,
        'objects': objects
    }

annotations = []
for filename in os.listdir('Data/annotations'):
    if filename.endswith('.xml'):
        xml_path = os.path.join('Data/annotations', filename)
        annotation_data = parse_annotation(xml_path)
        if annotation_data:
            annotations.append(annotation_data)

if annotations:
    print(annotations[0])
else:
    print("No annotations found.")

{'image_path': 'Data/images/70c171cc-photo_7_2025-07-27_04-38-43.jpg', 'width': 1280, 'height': 1001, 'objects': [{'name': 'RP', 'xmin': 743, 'ymin': 348, 'xmax': 753, 'ymax': 355}, {'name': 'RP', 'xmin': 887, 'ymin': 361, 'xmax': 893, 'ymax': 367}]}


In [13]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

def build_model(num_classes):
    base_model = MobileNetV2(input_shape=(128, 128, 3), include_top=False, weights='imagenet')
    base_model.trainable = False  # Freeze the base model

    inputs = Input(shape=(128, 128, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)

    # Bounding box regression head
    box_head = Dense(128, activation='relu')(x)
    box_head = Dense(64, activation='relu')(box_head)
    box_head = Dense(4, activation='sigmoid', name='box_head')(box_head)

    # Classification head
    class_head = Dense(128, activation='relu')(x)
    class_head = Dense(64, activation='relu')(class_head)
    # For binary classification (or single class), a single output neuron is sufficient.
    # If you have more than one class, change the number of units to num_classes
    # and the activation to 'softmax'.
    class_head = Dense(1 if num_classes == 1 else num_classes, activation='sigmoid' if num_classes == 1 else 'softmax', name='class_head')(class_head)

    model = Model(inputs=inputs, outputs=[box_head, class_head])
    return model

model = build_model(num_classes)

# Define losses and optimizer
losses = {
    "box_head": tf.keras.losses.MeanSquaredError(),
    "class_head": tf.keras.losses.BinaryCrossentropy() if num_classes == 1 else tf.keras.losses.CategoricalCrossentropy()
}
loss_weights = {"box_head": 1.0, "class_head": 1.0}
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# Compile the model
model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics={"class_head": "accuracy"})

# Prepare the dataset for training
def prepare_for_training(ds, batch_size=4, shuffle_buffer_size=100):
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.repeat()
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

train_dataset = prepare_for_training(dataset)

# Train the model
# We need to filter out annotations with no objects before calculating the number of steps
annotations_with_objects = [ann for ann in annotations if ann['objects']]
history = model.fit(train_dataset, epochs=10, steps_per_epoch=len(annotations_with_objects) // 4)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 73ms/step - box_head_loss: 0.0648 - class_head_accuracy: 0.6667 - class_head_loss: 0.4903 - loss: 0.5551
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - box_head_loss: 0.0317 - class_head_accuracy: 1.0000 - class_head_loss: 0.0027 - loss: 0.0344
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - box_head_loss: 0.0103 - class_head_accuracy: 1.0000 - class_head_loss: 2.0468e-04 - loss: 0.0105
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - box_head_loss: 0.0143 - class_head_accuracy: 1.0000 - class_head_loss: 6.9851e-06 - loss: 0.0144
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - box_head_loss: 0.0054 - class_head_accuracy: 1.0000 - class_head_loss: 1.1784e-06 - loss: 0.0054
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - box_head_

In [14]:
import tensorflow as tf

# Save the model in the SavedModel format
tf.saved_model.save(model, 'saved_model')

# Convert the SavedModel to TensorFlow.js format
!tensorflowjs_converter --input_format=tf_saved_model saved_model tfjs_model

2025-07-30 04:24:29.642953: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753849469.681556    3925 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753849469.693541    3925 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[32m🌲 Try [0m[34mhttps://ydf.readthedocs.io[0m[32m, the successor of TensorFlow Decision Forests with more features and faster training![0m
2025-07-30 04:24:35.726277: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
I0000 00:00:1753849477.330548    3925 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capabi