In [65]:
import cv2
import torch
from torchvision import transforms
import torch.nn.functional as F
import numpy as np
import utils
import utils.helpers
from ultralytics import YOLO
from huggingface_hub import hf_hub_download

In [2]:
model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
face_detector = YOLO(model_path)

INFO: HTTP Request: HEAD https://huggingface.co/arnabdhar/YOLOv8-Face-Detection/resolve/main/model.pt "HTTP/1.1 302 Found"


In [3]:
def pre_process(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((640, 640)),
        transforms.ToTensor(),
        transforms.Normalize(mean=0.5, std = 0.5)
    ])

    image = transform(image)
    image_batch = image.unsqueeze(0)
    return image_batch

In [113]:
def face_process(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(448),
        transforms.ToTensor(),
        transforms.Normalize(mean=0.5, std = 0.5)
    ])

    image = transform(image)
    image_batch = image.unsqueeze(0)
    return image_batch

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
model = 'resnet18'
bins = 90
weight = 'weights/resnet18.pt'
binwidth = 4
angle = 180
gaze_detector = utils.helpers.get_model(model, bins, inference_mode=True)
state_dict = torch.load(weight, map_location=device)
gaze_detector.load_state_dict(state_dict)
gaze_detector.to(device)
_ = gaze_detector.eval()

In [None]:
class yolo_face:
    def __init__(self):
        model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
        self.model = YOLO(model_path)


    def forward(self, img):
        with torch.no_grad():
            bboxes = self.model(img)[0].cpu()
        bbox_positions = [bbox.boxes.xyxyn[0] for bbox in bboxes]
        return bbox_positions
    
    
    def make_face_batch(self, image, bbox_list, preprocessor):
        face_crops = []
        for bbox in bbox_list:
            bbox_cords = bbox
            bbox_cords[[0, 2]] *= image.shape[1]
            bbox_cords[[1, 3]] *= image.shape[0]
            x_min, y_min, x_max, y_max = map(int, bbox_cords)
            crop = image[y_min:y_max, x_min:x_max]
            crop = preprocessor(crop)
            face_crops.append(crop)
        return torch.concatenate(face_crops)


In [130]:
unprocessed = cv2.imread('2025plans.jpg')
img = pre_process(unprocessed).to(device)  
face_detector = yolo_face()
bbox_list = face_detector.forward(img)
face_batch = face_detector.make_face_batch(unprocessed, bbox_list, pre_process)


INFO: HTTP Request: HEAD https://huggingface.co/arnabdhar/YOLOv8-Face-Detection/resolve/main/model.pt "HTTP/1.1 302 Found"



0: 640x640 1 FACE, 4.4ms
Speed: 0.0ms preprocess, 4.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)


In [131]:
gaze_detector = mobile_gaze('cuda')
pitch, yaw = gaze_detector.forward(face_batch.to(device))
gaze_detector.draw_result(unprocessed, pitch, yaw, bbox_list)
cv2.imwrite('checkmeout.png', unprocessed)

True

In [125]:
pitch, yaw

(tensor([1.1299]), tensor([-0.1639]))

In [126]:
class mobile_gaze:
    def __init__(self, device, model='resnet18', weight='weights/resnet18.pt', bins = 90, binwidth = 4, angle = 180):
        self.model = utils.helpers.get_model(model, bins, inference_mode=True)
        state_dict = torch.load(weight, map_location=device)
        self.model.load_state_dict(state_dict)
        self.model.to(device)
        self.model.eval()

        self.idx_tensor = torch.arange(bins, device=device, dtype=torch.float32)
        self.binwidth = binwidth
        self.angle = angle

    def forward(self, face_img):
        with torch.no_grad():
            pitch, yaw = self.model(face_img)

            pitch_predicted, yaw_predicted = F.softmax(pitch, dim=1), F.softmax(yaw, dim=1)
            pitch_predicted = torch.sum(pitch_predicted * self.idx_tensor, dim=1) * self.binwidth - self.angle
            yaw_predicted = torch.sum(yaw_predicted * self.idx_tensor, dim=1) * self.binwidth - self.angle

            pitch_predicted = np.radians(pitch_predicted.cpu())
            yaw_predicted = np.radians(yaw_predicted.cpu())

        return pitch_predicted, yaw_predicted

    def draw_result(self, img, pitch, yaw, bbox_list):
        for i in range(len(bbox_list)):
            utils.helpers.draw_bbox_gaze(img, bbox_list[i], pitch[i], yaw[i])


In [86]:
unprocessed = cv2.imread('2025plans.jpg')
img = pre_process(unprocessed).to(device)  
import time
gaze_detector = mobile_gaze('cuda')
face_detector = yolo_face()

result = face_detector.forward(img)
with torch.no_grad():
    for bbox in result:
        bbox_place = bbox.boxes.xyxyn.cpu()[0]
        bbox_place[[0, 2]] *= unprocessed.shape[1]
        bbox_place[[1, 3]] *= unprocessed.shape[0]
        x_min, y_min, x_max, y_max = map(int, bbox_place[:4])

        face_crop = unprocessed[y_min:y_max, x_min:x_max]
        face_crop = pre_process(face_crop)
        face_crop = face_crop.to(device)

        pitch, yaw = gaze_detector.forward(face_crop)

        gaze_detector.draw_result(unprocessed, pitch, yaw, bbox_place)
cv2.imwrite('checkay.png', unprocessed)

INFO: HTTP Request: HEAD https://huggingface.co/arnabdhar/YOLOv8-Face-Detection/resolve/main/model.pt "HTTP/1.1 302 Found"



0: 640x640 1 FACE, 4.7ms
Speed: 0.0ms preprocess, 4.7ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)


True

In [122]:
unprocessed = cv2.imread('2025plans.jpg')
img = pre_process(unprocessed).to(device)
faces = face_detector(img)[0]
for face in faces:
    bbox_place = face.boxes.xyxyn.cpu()[0]
    bbox_place[[0, 2]] *= unprocessed.shape[1]
    bbox_place[[1, 3]] *= unprocessed.shape[0]
    xA = int(bbox_place[0])
    yA = int(bbox_place[1])
    xB = int(bbox_place[2])
    yB = int(bbox_place[3])
    cv2.rectangle(unprocessed, (xA, yA), (xB, yB), (255, 0, 0), 5)
    
cv2.imwrite('checkay.png', unprocessed)

TypeError: 'yolo_face' object is not callable

In [47]:
pitch, yaw = gaze_detector(img)

In [7]:
idx_tensor = torch.arange(bins, device=device, dtype=torch.float32)

pitch_predicted, yaw_predicted = F.softmax(pitch, dim=1), F.softmax(yaw, dim=1)
pitch_predicted = torch.sum(pitch_predicted * idx_tensor, dim=1) * binwidth - angle
yaw_predicted = torch.sum(yaw_predicted * idx_tensor, dim=1) * binwidth - angle

In [8]:
pitch_predicted

tensor([21.0265], device='cuda:0', grad_fn=<SubBackward0>)

In [12]:
face_detector

RetinaFace({})

In [23]:
retinaface.pre_trained_models.get_model("resnet50_2020-07-20", max_size=2048)

<retinaface.predict_single.Model at 0x7a81dc726810>

TypeError: get_model() missing 2 required positional arguments: 'model_name' and 'max_size'

In [32]:
import retinaface_pytorch

ModuleNotFoundError: No module named 'retinaface_pytorch'