In [2]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from tqdm import tqdm

2024-12-09 22:47:29.119313: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-09 22:47:29.120724: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-09 22:47:29.123594: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-09 22:47:29.132794: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733802449.148124 3539844 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733802449.15

### Path and Image Size

In [3]:
image_dir = "/projectnb/ds340/projects/chop_dhruv/PNGimages/PNGImages"
label_dir = "/projectnb/ds340/projects/chop_dhruv/PNGimages/OUT/OUT"
model_save_path = "/projectnb/ds340/projects/chop_dhruv/pedestrian_detection_model_so_much.h5"
IMG_WIDTH, IMG_HEIGHT = 224, 224 

### Loading the Data

In [5]:
def load_data(image_dir, label_dir, max_images=10000):

    images, labels = [], []
    image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(".jpg")])

    if len(image_files) > max_images:
        
        image_files = image_files[:max_images]
        
    for image_file in tqdm(image_files):
        image_path = os.path.join(image_dir, image_file)
        label_path = os.path.join(label_dir, image_file.replace(".jpg", ".txt")) # same names

        if not os.path.exists(label_path):
            print(f"Label file missing for {image_file}")
            continue

        img = cv2.imread(image_path)
        if img is None:
            print(f"Error {image_file}")
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
        img = img / 255.0

        with open(label_path, "r") as f:
            bboxes = []
            for line in f:
                parts = line.strip().split()
                if len(parts) == 5:
                    _, x_center, y_center, box_width, box_height = map(float, parts)
                    bboxes.append([x_center, y_center, box_width, box_height])

        images.append(img)
        labels.append(np.array(bboxes)) 

    return np.array(images, dtype=np.float32), labels

X, y = load_data(image_dir, label_dir)


100%|██████████| 5903/5903 [00:34<00:00, 169.45it/s]


### Train & Testing

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

### CNN

In [11]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(4)
])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

### Custom Loss Function 

In [12]:
def smooth_l1_loss(y_true, y_pred):
    return tf.keras.losses.Huber()(y_true, y_pred)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss=smooth_l1_loss, metrics=['mae'])

### Training Prep

In [13]:
def prepare_training_data(X, y):
    X_train, y_train = [], []
    for img, bboxes in zip(X, y):
        for bbox in bboxes:
            X_train.append(img)
            y_train.append(bbox)
    return np.array(X_train), np.array(y_train)

X_train_prepared, y_train_prepared = prepare_training_data(X_train, y_train)
X_val_prepared, y_val_prepared = prepare_training_data(X_val, y_val)

### Model.fit

In [14]:
history = model.fit(
    X_train_prepared, y_train_prepared,
    validation_data=(X_val_prepared, y_val_prepared),
    epochs=50,
    batch_size=16,
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/50
[1m1926/1926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 77ms/step - loss: 0.0128 - mae: 0.1103 - val_loss: 0.0085 - val_mae: 0.0768 - learning_rate: 1.0000e-04
Epoch 2/50
[1m1926/1926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 73ms/step - loss: 0.0091 - mae: 0.0862 - val_loss: 0.0082 - val_mae: 0.0765 - learning_rate: 1.0000e-04
Epoch 3/50
[1m1926/1926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 74ms/step - loss: 0.0082 - mae: 0.0790 - val_loss: 0.0079 - val_mae: 0.0736 - learning_rate: 1.0000e-04
Epoch 4/50
[1m1926/1926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 75ms/step - loss: 0.0078 - mae: 0.0750 - val_loss: 0.0079 - val_mae: 0.0729 - learning_rate: 1.0000e-04
Epoch 5/50
[1m1926/1926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 77ms/step - loss: 0.0075 - mae: 0.0723 - val_loss: 0.0079 - val_mae: 0.0738 - learning_rate: 1.0000e-04
Epoch 6/50
[1m1926/1926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151

### Save

In [15]:
model.save(model_save_path)
print(f"Model saved to {model_save_path}")



Model saved to /projectnb/ds340/projects/chop_dhruv/pedestrian_detection_model_so_much.h5


### Intersection Over Union (Accuracy)

In [16]:
def compute_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    inter_x_min = max(x1_min, x2_min)
    inter_y_min = max(y1_min, y2_min)
    inter_x_max = min(x1_max, x2_max)
    inter_y_max = min(y1_max, y2_max)

    inter_width = max(0, inter_x_max - inter_x_min)
    inter_height = max(0, inter_y_max - inter_y_min)
    inter_area = inter_width * inter_height

    if inter_area == 0:
        return 0

    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)
    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area

## Predicting and Output 

In [17]:
def evaluate_model(X, y, model, iou_threshold=0): # zero explained why in report
    correct_predictions = 0
    total_images = len(X)

    preds = model.predict(X)

    for true_boxes, pred_box in zip(y, preds):
        image_correct = False

        pred_coords = [
            (pred_box[0] - pred_box[2] / 2) * IMG_WIDTH,  
            (pred_box[1] - pred_box[3] / 2) * IMG_HEIGHT, 
            (pred_box[0] + pred_box[2] / 2) * IMG_WIDTH, 
            (pred_box[1] + pred_box[3] / 2) * IMG_HEIGHT,  
        ]

        for true_box in true_boxes:
            if np.all(true_box == 0):
                continue

            true_coords = [
                (true_box[0] - true_box[2] / 2) * IMG_WIDTH,  
                (true_box[1] - true_box[3] / 2) * IMG_HEIGHT,  
                (true_box[0] + true_box[2] / 2) * IMG_WIDTH, 
                (true_box[1] + true_box[3] / 2) * IMG_HEIGHT, 
            ]

            iou = compute_iou(true_coords, pred_coords)

            if iou >= iou_threshold:
                image_correct = True
                break

        if image_correct:
            correct_predictions += 1

    accuracy = correct_predictions / total_images
    print(f"Model Accuracy (any overlap): {accuracy:.4f}")
    return accuracy

### Evaluate

In [18]:
evaluate_model(X_val, y_val, model)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step
Model Accuracy (any overlap): 0.8408


0.8408128704487722