In [18]:
import sys
print(sys.executable)


E:\CSIR\gpuenv\Scripts\python.exe


In [19]:
import cv2
import numpy as np
import onnxruntime as ort

# Load ONNX model
onnx_model_path = "best.onnx"
session = ort.InferenceSession(onnx_model_path)
input_name = session.get_inputs()[0].name

print("input_name:",input_name)


input_name: images


What it does: PREPROCESSING

- Resizes the input to 640×640 → default input size for YOLOv8.
- Converts BGR → RGB → as PyTorch models are trained on RGB.
- Normalizes pixel values to [0, 1].
- Transposes shape from (H, W, C) → (C, H, W).
- Adds batch dimension: (1, 3, 640, 640).

In [20]:
def preprocess(frame, size=320):
    img = cv2.resize(frame, (size, size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0
    img = np.transpose(img, (2, 0, 1))  # (H, W, C) → (C, H, W)
    img = np.expand_dims(img, axis=0).copy()  # Add batch dimension
    return img

In [25]:
def postprocess(outputs, orig_frame, input_size=320, conf_thres=0.3, iou_thres=0.4):
    predictions = outputs[0]  # (1, 5, 8400)
    predictions = np.squeeze(predictions)  # (5, 8400)

    x_center = predictions[0]
    y_center = predictions[1]
    width    = predictions[2]
    height   = predictions[3]
    conf     = predictions[4]

    boxes = []
    confidences = []

    orig_h, orig_w = orig_frame.shape[:2]
    scale_x = orig_w / input_size
    scale_y = orig_h / input_size

    for i in range(predictions.shape[1]):
        if conf[i] < conf_thres:
            continue

        w = width[i]
        h = height[i]
        x = x_center[i] - w / 2
        y = y_center[i] - h / 2

        x1 = int(x * scale_x)
        y1 = int(y * scale_y)
        w_scaled = int(w * scale_x)
        h_scaled = int(h * scale_y)

        boxes.append([x1, y1, w_scaled, h_scaled])
        confidences.append(float(conf[i]))

    # Apply Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thres, iou_thres)

    for i in indices:
        i = i[0] if isinstance(i, (tuple, list, np.ndarray)) else i
        box = boxes[i]
        x1, y1, w, h = box
        x2 = x1 + w
        y2 = y1 + h

        score_text = f"Face: {confidences[i] * 100:.1f}%"
        cv2.rectangle(orig_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(orig_frame, score_text, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    return orig_frame


In [26]:
# Webcam setup
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Set inference resolution (try 320, 416, 256 — test on your Pi)
inference_size = 320

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Mirror effect for webcam

    # Preprocess resized frame for model input
    resized_frame = cv2.resize(frame, (inference_size, inference_size))
    input_tensor = preprocess(resized_frame, size=inference_size)

    # Run inference
    outputs = session.run(None, {input_name: input_tensor})

    # Postprocess and draw on original frame
    annotated = postprocess(outputs, frame, input_size=inference_size)

    cv2.imshow("YOLOv8 ONNX Face Detection", annotated)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()