In [15]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models


In [16]:
train_df = pd.read_csv("train2.csv")
train_df.head()


Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


Set Parameters

In [17]:
IMAGE_SIZE = 128  # smaller size to save memory
BATCH_SIZE = 8    # small batch to avoid crashing
EPOCHS = 5        # fewer epochs to test first
TRAIN_DIR = "train_images"  # path to images


In [18]:


def parse_row(image_id, bbox, width, height):
    # Convert from tensor to Python
    image_id = image_id.numpy().decode('utf-8')
    bbox = bbox.numpy().decode('utf-8')
    width = int(width.numpy())
    height = int(height.numpy())

    # Load image
    img_path = os.path.join(TRAIN_DIR, image_id + ".jpg")
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
    img = img / 255.0  # normalize

    # Convert bbox to YOLO format
    x, y, w, h = eval(bbox)
    x_center = (x + w/2) / width
    y_center = (y + h/2) / height
    w_norm = w / width
    h_norm = h / height

    return img.astype(np.float32), np.array([x_center, y_center, w_norm, h_norm], dtype=np.float32)


In [19]:
def tf_parse(image_id, bbox, width, height):
    img, box = tf.py_function(parse_row,
                               [image_id, bbox, width, height],
                               [tf.float32, tf.float32])
    img.set_shape([IMAGE_SIZE, IMAGE_SIZE, 3])
    box.set_shape([4])
    return img, box


In [20]:
dataset = tf.data.Dataset.from_tensor_slices(
    (train_df['image_id'], train_df['bbox'], train_df['width'], train_df['height'])
)

dataset = dataset.map(tf_parse, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.shuffle(512).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


In [21]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(4, activation='sigmoid')  # x_center, y_center, w, h
])

model.compile(optimizer='adam', loss='mse')
model.summary()


In [None]:
history = model.fit(dataset, epochs=EPOCHS)


Epoch 1/5
[1m18475/18475[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1860s[0m 101ms/step - loss: 0.0437
Epoch 2/5
[1m 9599/18475[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m14:34[0m 99ms/step - loss: 0.0433