In [None]:
from computer_vision.api.load_model import create_model, device, pred_to_name
from computer_vision.utils.helper import load_config
from computer_vision.data.transforms import get_transform, inverse_transform
from utils.helper import find_project_root

config = load_config(find_project_root() / "src" / "computer_vision" / "api" / "config.yaml")

model = create_model(config)

transform = get_transform(config.img_size, config.normalization["mean"], config.normalization["std"], augments=False)

In [None]:
import cv2
import torch, numpy as np
from PIL import Image

face_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)


In [None]:
def image_from_transform(x_input: torch.Tensor, mean:list, std:list):
    x_vis = x_input.squeeze(0).detach().cpu()

    # Si Normalize appliqué → denormalize
    mean = torch.tensor(mean).view(3,1,1)
    std = torch.tensor(std).view(3,1,1)

    x_vis = x_vis * std + mean
    x_vis = torch.clamp(x_vis, 0, 1)

    # (C,H,W) → (H,W,C)
    x_vis = x_vis.permute(1,2,0).numpy()

    # float [0,1] → uint8 [0,255]
    x_vis = (x_vis * 255).astype(np.uint8)

    # RGB → BGR pour OpenCV
    x_vis = cv2.cvtColor(x_vis, cv2.COLOR_RGB2BGR)

    return x_vis

In [None]:
# Open the default camera
cam = cv2.VideoCapture(0)

# Get the default frame width and height
frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Frame width: {frame_width}, Frame height: {frame_height}")

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (frame_width, frame_height))

while True:
    ret, frame = cam.read()
    if not ret:
        print("Failed to grab frame")
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.3,
        minNeighbors=5,
        minSize=(30, 30)
    )

    for (x, y, w, h) in faces:
        # Crop visage
        face = frame[y:y+h, x:x+w]

        # BGR → RGB
        face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(face_rgb)

        x_input = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            pred = model.predict(x_input)[0]

        pred = pred_to_name(pred)

        # Dessin bounding box
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0,255,0), 2)
        cv2.putText(frame, pred, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)

        # Debug: afficher entrée modèle
        x_vis = image_from_transform(x_input, config.normalization['mean'], config.normalization['std'])

        #cv2.imshow("Model Input", x_vis)

        # Display the captured frame
        cv2.imshow('Camera', frame)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) == ord('q'):
        break

# Release the capture and writer objects
cam.release()
#out.release()
cv2.destroyAllWindows()