In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard


In [2]:
IMG_SIZE = 224
GRID_H, GRID_W = 7, 7
NUM_BOXES = 2
NUM_CLASSES = 1    # single class: "car"
BATCH = 16
EPOCHS = 50

In [3]:
DATA_ROOT = "D:\Downloads\carDetection\data"  # <-- point this at your data folder
TRAIN_CSV = os.path.join(DATA_ROOT, 'train_solution_bounding_boxes.csv')
TRAIN_DIR = os.path.join(DATA_ROOT, 'training_images')
TEST_DIR  = os.path.join(DATA_ROOT, 'testing_images')
SUB_CSV   = os.path.join(DATA_ROOT, 'sample_submission.csv')


In [4]:
def load_data_from_csv(csv_path, img_dir):
    """
    Returns:
      images:      np.array, shape (N,IMG_SIZE,IMG_SIZE,3)
      boxes_list:  list of arrays (num_boxes,4) in [x1,y1,x2,y2]
      labels_list: list of arrays (num_boxes,) with value 1.0
    """
    df = pd.read_csv(csv_path)
    images, boxes_list, labels_list = [], [], []

    for fname, group in df.groupby('image'):
        img_path = os.path.join(img_dir, fname)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) / 255.0

        boxes = group[['xmin','ymin','xmax','ymax']].values.astype(np.float32)
        labels = np.ones((boxes.shape[0],), dtype=np.float32)

        images.append(img)
        boxes_list.append(boxes)
        labels_list.append(labels)

    return np.array(images), boxes_list, labels_list

In [5]:
def convert_targets(boxes, labels, img_w=IMG_SIZE, img_h=IMG_SIZE):
    target = np.zeros((GRID_H, GRID_W, NUM_BOXES, 5 + NUM_CLASSES), np.float32)
    for b, lab in zip(boxes, labels):
        cx = (b[0] + b[2]) / (2 * img_w)
        cy = (b[1] + b[3]) / (2 * img_h)
        w  = (b[2] - b[0]) / img_w
        h  = (b[3] - b[1]) / img_h
        gx = int(np.clip(np.floor(cx * GRID_W), 0, GRID_W - 1))
        gy = int(np.clip(np.floor(cy * GRID_H), 0, GRID_H - 1))

        for box_idx in range(NUM_BOXES):
            if target[gy, gx, box_idx, 4] == 0:
                target[gy, gx, box_idx, 0] = cx * GRID_W - gx
                target[gy, gx, box_idx, 1] = cy * GRID_H - gy
                target[gy, gx, box_idx, 2] = w
                target[gy, gx, box_idx, 3] = h
                target[gy, gx, box_idx, 4] = 1.0
                target[gy, gx, box_idx, 5] = 1.0 if lab == 1 else 0.0
                break
    return target


In [6]:
def data_generator(images, boxes_list, labels_list, batch_size, augment=False):
    while True:
        idxs = np.random.permutation(len(images))
        for start in range(0, len(images), batch_size):
            batch_idxs = idxs[start:start+batch_size]
            batch_imgs, batch_tgts = [], []
            for i in batch_idxs:
                img = images[i].copy()
                boxes, labels = boxes_list[i], labels_list[i]
                if augment and np.random.rand()>0.5:
                    img = np.fliplr(img)
                    boxes[:, [0,2]] = IMG_SIZE - boxes[:, [2,0]]
                batch_imgs.append(img)
                batch_tgts.append(convert_targets(boxes, labels))
            yield np.array(batch_imgs), np.array(batch_tgts)

In [7]:
def compute_giou(true_corners, pred_corners):
    x1 = tf.maximum(true_corners[...,0], pred_corners[...,0])
    y1 = tf.maximum(true_corners[...,1], pred_corners[...,1])
    x2 = tf.minimum(true_corners[...,2], pred_corners[...,2])
    y2 = tf.minimum(true_corners[...,3], pred_corners[...,3])
    inter = tf.maximum(0., x2-x1) * tf.maximum(0., y2-y1)
    area_t = (true_corners[...,2]-true_corners[...,0]) * (true_corners[...,3]-true_corners[...,1])
    area_p = (pred_corners[...,2]-pred_corners[...,0]) * (pred_corners[...,3]-pred_corners[...,1])
    union = area_t + area_p - inter + 1e-7
    iou = inter / union
    ex1 = tf.minimum(true_corners[...,0], pred_corners[...,0])
    ey1 = tf.minimum(true_corners[...,1], pred_corners[...,1])
    ex2 = tf.maximum(true_corners[...,2], pred_corners[...,2])
    ey2 = tf.maximum(true_corners[...,3], pred_corners[...,3])
    enc = (ex2-ex1)*(ey2-ey1) + 1e-7
    return iou - (enc - union) / enc

In [13]:
def yolo_style_loss(y_true, y_pred):
    # unpack
    true_xy   = y_true[..., :2]
    true_wh   = y_true[..., 2:4]
    true_conf = y_true[..., 4:5]    # shape [...,1]
    true_cls  = y_true[..., 5:]     # shape [...,1]

    pred_xy   = y_pred[..., :2]
    pred_wh   = y_pred[..., 2:4]
    pred_conf = y_pred[..., 4:5]
    pred_cls  = y_pred[..., 5:]

    # corners for GIoU
    t_corners = tf.concat([true_xy - true_wh/2, true_xy + true_wh/2], axis=-1)
    p_corners = tf.concat([pred_xy - pred_wh/2, pred_xy + pred_wh/2], axis=-1)
    giou = compute_giou(t_corners, p_corners)
    # giou_loss = true_conf * (1.0 - giou)[...,tf.newaxis]  # shape [...,1]
    giou_loss = tf.maximum(0.0, 1.0 - giou)[...,tf.newaxis]  # shape [...,1]
    # masks
    obj_mask   = true_conf
    noobj_mask = 1.0 - obj_mask

    # confidence BCE, then re-expand dims to [...,1]
    bce_conf = tf.keras.losses.binary_crossentropy(true_conf, pred_conf)
    bce_conf = tf.expand_dims(bce_conf, axis=-1)             # :contentReference[oaicite:2]{index=2}
    conf_loss = obj_mask * bce_conf + 0.5 * noobj_mask * bce_conf

    # class BCE, re-expand dims
    bce_cls   = tf.keras.losses.binary_crossentropy(true_cls, pred_cls)
    bce_cls   = tf.expand_dims(bce_cls, axis=-1)
    class_loss = obj_mask * bce_cls

    # sum all components
    total = (giou_loss + conf_loss + class_loss)
    return tf.reduce_mean(total)


In [14]:
backbone = MobileNetV2(weights='imagenet', include_top=False,
                       input_shape=(IMG_SIZE,IMG_SIZE,3))
backbone.trainable = False

In [15]:
x = backbone.output
x = layers.Conv2D(128,3,padding='same',activation='relu')(x)
x = layers.Conv2D(128,3,padding='same',activation='relu')(x)
x = layers.Conv2D(NUM_BOXES*(4+1+NUM_CLASSES),1,padding='same')(x)
x = layers.Reshape((GRID_H,GRID_W,NUM_BOXES,5+NUM_CLASSES))(x)

xy   = x[..., :2]
wh   = x[..., 2:4]
conf = layers.Activation('sigmoid')(x[..., 4:5])
cls  = layers.Activation('sigmoid')(x[..., 5:])
out  = layers.Concatenate(axis=-1)([xy, wh, conf, cls])

model = models.Model(backbone.input, out)
lr_schedule = optimizers.schedules.CosineDecay(
    initial_learning_rate=1e-3, decay_steps=20000, alpha=1e-3)
optimizer = optimizers.Adam(learning_rate=lr_schedule)
model.compile(optimizer=optimizer, loss=yolo_style_loss)

In [16]:
imgs, boxes, labels = load_data_from_csv(TRAIN_CSV, TRAIN_DIR)
train_imgs, val_imgs, train_boxes, val_boxes, train_labels, val_labels = train_test_split(
    imgs, boxes, labels, test_size=0.2, random_state=42
)

train_gen = data_generator(train_imgs, train_boxes, train_labels, BATCH, augment=True)
val_gen   = data_generator(val_imgs,   val_boxes,   val_labels,   BATCH, augment=False)


In [17]:
es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
tb = TensorBoard(log_dir='./logs', histogram_freq=1)

steps_tr  = len(train_imgs) // BATCH
steps_val = len(val_imgs)   // BATCH

model.fit(
    train_gen,
    validation_data=val_gen,
    steps_per_epoch=steps_tr,
    validation_steps=steps_val,
    epochs=EPOCHS,
    callbacks=[es, tb]
)

Epoch 1/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 2s/step - loss: 49385136.0000 - val_loss: 3.4259
Epoch 2/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - loss: 3.1130 - val_loss: 2.2314
Epoch 3/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 960ms/step - loss: 1.9400 - val_loss: 1.4935
Epoch 4/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 948ms/step - loss: 1.4469 - val_loss: 1.3746
Epoch 5/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1s/step - loss: 1.3633 - val_loss: 1.3386
Epoch 6/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 935ms/step - loss: 1944.9539 - val_loss: 1.1550
Epoch 7/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 963ms/step - loss: 1.1468 - val_loss: 1.0996
Epoch 8/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1s/step - loss: 1.0878 - val_loss: 1.0592
Epoch 9/50
[1m17/17[0m [32m━━━━

<keras.src.callbacks.history.History at 0x284d2672450>

In [19]:
model.save('car_detector.keras')