In [26]:
import onnxruntime as ort
import cv2
import numpy as np

session = ort.InferenceSession("../data/models/yolov8n.onnx")
input_name = session.get_inputs()[0].name

image = cv2.imread("../data/test_images/common.jpg")

# Preprocess image for ONNX model
print("[INFO] Preprocessing image for face detection")
# Resize image to the required size for YOLO model
input_image = cv2.resize(image, (640, 640))
# Normalize pixel values to range [0, 1]
input_image = input_image.astype(np.float32) / 255.0
# Change image layout to channel-first format as required by ONNX model
input_image = np.transpose(input_image, (2, 0, 1))  # Channel first
# Add batch dimension (needed by the model)
input_image = np.expand_dims(input_image, axis=0)

# Perform inference on the provided image
print("[INFO] Running inference on the image")

output = session.run(
    output_names=None, 
    input_feed= {input_name: input_image}
)
outputs = output[0][0]

[INFO] Preprocessing image for face detection
[INFO] Running inference on the image


In [33]:
outputs[0][8300:8400]

array([ 22.25124 ,  63.748386,  87.80733 , 129.69324 , 174.27881 ,
       192.77115 , 213.32562 , 269.3759  , 316.48227 , 322.21863 ,
       320.0763  , 316.99564 , 318.22125 , 326.87085 , 433.69553 ,
       461.33926 , 492.1537  , 521.9314  , 574.0167  , 586.5244  ,
        22.237974,  64.08118 , 112.29887 , 154.78146 , 179.81332 ,
       190.1046  , 224.40012 , 279.797   , 318.0047  , 318.1396  ,
       320.30872 , 317.26102 , 318.1406  , 335.68372 , 430.21753 ,
       459.9945  , 492.00806 , 498.9185  , 567.9201  , 587.4642  ,
        22.73684 ,  68.41667 , 181.43848 , 173.88988 , 179.5802  ,
       191.43561 , 238.93619 , 288.25153 , 318.90552 , 317.70673 ,
       321.46643 , 318.6952  , 319.8886  , 353.45447 , 402.1404  ,
       459.18643 , 494.7537  , 490.04715 , 542.8994  , 588.4896  ,
        22.670723,  91.90434 , 216.26353 , 197.72499 , 181.47786 ,
       196.88611 , 270.8847  , 307.2757  , 320.03256 , 318.68646 ,
       323.23904 , 320.78094 , 317.70187 , 334.6641  , 363.420

In [None]:
faces = []

for detection in outputs:
    # Extract bounding box coordinates
    center_x, center_y, width, height = detection[:4]

    # Denormalize to original image size (assuming image size of 640x640)
    center_x *= 640
    center_y *= 640
    width *= 640
    height *= 640

    # Convert to (x1, y1, x2, y2)
    x1 = int(center_x - width / 2)
    y1 = int(center_y - height / 2)
    x2 = int(center_x + width / 2)
    y2 = int(center_y + height / 2)

    # Extract objectness score
    objectness_score = detection[4]
    confidence_threshold = 0.65
    if objectness_score < confidence_threshold:
        continue

    # Extract class scores and determine the class with highest confidence
    class_scores = detection[5:]
    class_id = np.argmax(class_scores)
    class_confidence = class_scores[class_id]

    # Set a class confidence threshold
    if class_confidence > 0.5:
        faces.append([x1, y1, x2, y2, class_id, class_confidence])
        # Optionally draw the bounding box on the image
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)


In [34]:
import onnxruntime as ort
import cv2
import numpy as np
import torch

# Load the model
session = ort.InferenceSession("../data/models/yolov8n.onnx")
input_name = session.get_inputs()[0].name

# Load and preprocess the image
image = cv2.imread("../data/test_images/common.jpg")
input_image = cv2.resize(image, (640, 640))
input_image = input_image.astype(np.float32) / 255.0
input_image = np.transpose(input_image, (2, 0, 1))  # Channel first
input_image = np.expand_dims(input_image, axis=0)

# Run inference
output = session.run(None, {input_name: input_image})

# Process the outputs
outputs = output[0]
outputs = outputs.transpose(0, 2, 1)
outputs = outputs[0]

# Extract boxes and scores
boxes = outputs[:, :4]
scores = outputs[:, 4:]

# Apply activation functions
class_probs = 1 / (1 + np.exp(-scores))

# Decode bounding boxes
boxes[:, 0] = boxes[:, 0] - boxes[:, 2] / 2  # x1
boxes[:, 1] = boxes[:, 1] - boxes[:, 3] / 2  # y1
boxes[:, 2] = boxes[:, 0] + boxes[:, 2]      # x2
boxes[:, 3] = boxes[:, 1] + boxes[:, 3]      # y2

# Filter predictions
confidences = np.max(class_probs, axis=1)
class_ids = np.argmax(class_probs, axis=1)
conf_threshold = 0.5
mask = confidences > conf_threshold
boxes = boxes[mask]
confidences = confidences[mask]
class_ids = class_ids[mask]

# Rescale boxes
orig_height, orig_width = image.shape[:2]
scale_x = orig_width / 640
scale_y = orig_height / 640
boxes[:, [0, 2]] *= scale_x
boxes[:, [1, 3]] *= scale_y

# Apply NMS
boxes_tensor = torch.tensor(boxes)
confidences_tensor = torch.tensor(confidences)
indices = torch.ops.torchvision.nms(boxes_tensor, confidences_tensor, iou_threshold=0.5)
boxes = boxes[indices]
confidences = confidences[indices]
class_ids = class_ids[indices]

# Draw boxes on the image
for box, conf, class_id in zip(boxes, confidences, class_ids):
    x1, y1, x2, y2 = box.astype(int)
    label = f"Class {class_id}: {conf:.2f}"
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(image, label, (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# Display the image
cv2.imshow("Detections", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


AttributeError: '_OpNamespace' 'torchvision' object has no attribute 'nms'

In [8]:
from ultralytics import YOLO

model = YOLO("../data/models/yolov8n.onnx", task="detect")
results = model("../data/known_faces/elyor/front.jpg")

Loading ../data/models/yolov8n.onnx for ONNX Runtime inference...
Preferring ONNX Runtime AzureExecutionProvider

image 1/1 /home/el02/PiPresence/tests/../data/known_faces/elyor/front.jpg: 640x640 1 person, 33.7ms
Speed: 0.7ms preprocess, 33.7ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


In [9]:
from ultralytics import YOLO

# Load the model
model = YOLO('../data/models/yolov8n.pt')

# Get the model architecture (Pytorch model)
yolo_model = model.model

# Get the names of all layers
layer_names = [name for name, _ in yolo_model.named_modules()]

print("Layer names:")
print(layer_names)


Layer names:
['', 'model', 'model.0', 'model.0.conv', 'model.0.bn', 'model.0.act', 'model.1', 'model.1.conv', 'model.1.bn', 'model.2', 'model.2.cv1', 'model.2.cv1.conv', 'model.2.cv1.bn', 'model.2.cv2', 'model.2.cv2.conv', 'model.2.cv2.bn', 'model.2.m', 'model.2.m.0', 'model.2.m.0.cv1', 'model.2.m.0.cv1.conv', 'model.2.m.0.cv1.bn', 'model.2.m.0.cv2', 'model.2.m.0.cv2.conv', 'model.2.m.0.cv2.bn', 'model.3', 'model.3.conv', 'model.3.bn', 'model.4', 'model.4.cv1', 'model.4.cv1.conv', 'model.4.cv1.bn', 'model.4.cv2', 'model.4.cv2.conv', 'model.4.cv2.bn', 'model.4.m', 'model.4.m.0', 'model.4.m.0.cv1', 'model.4.m.0.cv1.conv', 'model.4.m.0.cv1.bn', 'model.4.m.0.cv2', 'model.4.m.0.cv2.conv', 'model.4.m.0.cv2.bn', 'model.4.m.1', 'model.4.m.1.cv1', 'model.4.m.1.cv1.conv', 'model.4.m.1.cv1.bn', 'model.4.m.1.cv2', 'model.4.m.1.cv2.conv', 'model.4.m.1.cv2.bn', 'model.5', 'model.5.conv', 'model.5.bn', 'model.6', 'model.6.cv1', 'model.6.cv1.conv', 'model.6.cv1.bn', 'model.6.cv2', 'model.6.cv2.conv', 

In [10]:
import torch

# Define a dummy input
dummy_input = torch.randn(1, 3, 640, 640)

# Export to ONNX
torch.onnx.export(yolo_model, dummy_input, "data/models/yolov8n.onnx",
                  input_names=['input'],
                  output_names=['output_boxes', 'output_classes'],
                  dynamic_axes={'input': {0: 'batch_size'}, 'output_boxes': {0: 'batch_size'}, 'output_classes': {0: 'batch_size'}})


  if self.format != "imx" and (self.dynamic or self.shape != shape):
  for i, stride in enumerate(strides):


In [2]:
import onnxruntime as ort
import cv2
import numpy as np

# Load the ONNX model
session = ort.InferenceSession("../data/models/yolov8n-face.onnx")

# Get the input name for ONNX model
input_name = session.get_inputs()[0].name

# Get the output names from ONNX model
output_names = [output.name for output in session.get_outputs()]

# Load and preprocess the image
image = cv2.imread("../data/test_images/friends_gathering.jpg")
print("[INFO] Preprocessing image for face detection")
input_image = cv2.resize(image, (640, 640))
input_image = input_image.astype(np.float32) / 255.0
input_image = np.transpose(input_image, (2, 0, 1))  # Channel first
input_image = np.expand_dims(input_image, axis=0)

# Perform inference on the provided image
print("[INFO] Running inference on the image")
outputs = session.run(output_names, {input_name: input_image})

# Output shape is likely (1, 84, 8400)
print("Output shape:", outputs[0].shape)

[INFO] Preprocessing image for face detection
[INFO] Running inference on the image
Output shape: (1, 5, 8400)


In [1]:
# Ultralytics YOLO 🚀, AGPL-3.0 license

import argparse

import cv2.dnn
import numpy as np

from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml

CLASSES = yaml_load(check_yaml("coco8.yaml"))["names"]
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))


def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    """
    Draws bounding boxes on the input image based on the provided arguments.

    Args:
        img (numpy.ndarray): The input image to draw the bounding box on.
        class_id (int): Class ID of the detected object.
        confidence (float): Confidence score of the detected object.
        x (int): X-coordinate of the top-left corner of the bounding box.
        y (int): Y-coordinate of the top-left corner of the bounding box.
        x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
        y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
    """
    label = f"{CLASSES[class_id]} ({confidence:.2f})"
    color = colors[class_id]
    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


def main(onnx_model, input_image):
    """
    Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.

    Args:
        onnx_model (str): Path to the ONNX model.
        input_image (str): Path to the input image.

    Returns:
        list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
    """
    # Load the ONNX model
    model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)

    # Read the input image
    original_image: np.ndarray = cv2.imread(input_image)
    [height, width, _] = original_image.shape

    # Prepare a square image for inference
    length = max((height, width))
    image = np.zeros((length, length, 3), np.uint8)
    image[0:height, 0:width] = original_image

    # Calculate scale factor
    scale = length / 640

    # Preprocess the image and prepare blob for model
    blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
    model.setInput(blob)

    # Perform inference
    outputs = model.forward()

    # Prepare output array
    outputs = np.array([cv2.transpose(outputs[0])])
    rows = outputs.shape[1]

    boxes = []
    scores = []
    class_ids = []

    # Iterate through output to collect bounding boxes, confidence scores, and class IDs
    for i in range(rows):
        classes_scores = outputs[0][i][4:]
        (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
        if maxScore >= 0.25:
            box = [
                outputs[0][i][0] - (0.5 * outputs[0][i][2]),
                outputs[0][i][1] - (0.5 * outputs[0][i][3]),
                outputs[0][i][2],
                outputs[0][i][3],
            ]
            boxes.append(box)
            scores.append(maxScore)
            class_ids.append(maxClassIndex)

    # Apply NMS (Non-maximum suppression)
    result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)

    detections = []

    # Iterate through NMS results to draw bounding boxes and labels
    for i in range(len(result_boxes)):
        index = result_boxes[i]
        box = boxes[index]
        detection = {
            "class_id": class_ids[index],
            "class_name": CLASSES[class_ids[index]],
            "confidence": scores[index],
            "box": box,
            "scale": scale,
        }
        detections.append(detection)
        draw_bounding_box(
            original_image,
            class_ids[index],
            scores[index],
            round(box[0] * scale),
            round(box[1] * scale),
            round((box[0] + box[2]) * scale),
            round((box[1] + box[3]) * scale),
        )

    # Display the image with bounding boxes
    cv2.imshow("image", original_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return detections

In [4]:
main("../data/models/yolov8n.onnx", "../data/test_images/common.jpg")

[{'class_id': 0,
  'class_name': 'person',
  'confidence': 0.9291252493858337,
  'box': [np.float32(72.9223),
   np.float32(2.3677368),
   np.float32(496.26514),
   np.float32(353.2938)],
  'scale': 1.875}]

In [11]:
from pipresence.detect_faces import FaceDetector
import cv2 

detector = FaceDetector("../data/models/yolov8n-face.onnx")
image = cv2.imread("../data/known_faces/elyor/right.jpg")
detections = detector.detect_faces(image)
detection = detections[0]

[INFO] Loading YOLOv8n face-model from ../data/models/yolov8n-face.onnx


In [12]:
print(detections)
bbox = detection["box"]
x = round(bbox[0] * detection["scale"])
y = round(bbox[1] * detection["scale"])
x_plus_w = round((bbox[0] + bbox[2]) * detection["scale"])
y_plus_h = round((bbox[1] + bbox[3]) * detection["scale"])
color = (200, 56, 159)
cv2.rectangle(image, (x, y), (x_plus_w, y_plus_h), color, 2)
cv2.putText(image, "Common", (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

while True:
    cv2.imshow("Example", image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("[INFO] 'q' pressed, exiting the application")
        break

cv2.destroyAllWindows()


[{'class_id': 0, 'class_name': 'person', 'confidence': 0.3326736390590668, 'box': [np.float32(195.62439), np.float32(135.87161), np.float32(293.73694), np.float32(217.464)], 'scale': 1.0}]
[INFO] 'q' pressed, exiting the application


In [None]:
from pipresence.recognize_faces import FaceRecognizer
from pipresence.config import Config 
import cv2

Config.update_config(mobilefacenet_model_path="../data/models/mobilefacenet_fixed.onnx")
recognizer = FaceRecognizer()
image = cv2.imread("../data/known_faces/tom/front.jpg")
preprocessed_image = recognizer.preprocess(image)

# Run inference
outputs = recognizer.session.run(None, {recognizer.input_name: preprocessed_image})
outputs

[INFO] Loading MobileFaceNet model from ../data/models/mobilefacenet_fixed.onnx


InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input:0 for the following indices
 index: 1 Got: 3 Expected: 112
 index: 2 Got: 640 Expected: 112
 index: 3 Got: 640 Expected: 3
 Please fix either the inputs/outputs or the model.

In [3]:
from pipresence.recognize_faces import FaceRecognizer
import cv2
import os
import pickle

recognizer = FaceRecognizer("../data/models/mobilefacenet0.onnx")
embeddings_file = "../data/encodings/face_embeddings.pkl"
image = cv2.imread("../data/known_faces/elyor/right.jpg")
embedding = recognizer.recognize_face(detections[0])

[INFO] Loading MobileFaceNet model from ../data/models/mobilefacenet0.onnx
[ERROR] Face recognition failed: 'dict' object has no attribute 'shape'


In [None]:
if os.path.exists(embeddings_file):
    # Load existing embeddings from the file
    print(f"[INFO] Loading known face embeddings from {embeddings_file}")
    with open(embeddings_file, 'rb') as f:
        database = pickle.load(f)

# Compare detected face with known faces in the database
for name, known_embedding in database.items():
    if recognizer.compare_embeddings(embedding, known_embedding):
        print(f"[INFO] Recognized {name}")
        # Annotate the recognized face in the video feed
        cv2.putText(image, f"{name}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        # Display the video feed with annotations

cv2.imshow('PiPresence - Attendance Recognition', image)
# Exit loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
    print("[INFO] 'q' pressed, exiting the application")
    cv2.destroyAllWindows()

[INFO] Loading known face embeddings from ../data/encodings/face_embeddings.pkl


: 

In [8]:
from pipresence.preprocess import ImagePreprocessor
from pipresence.config import Config
Config.update_config(
    yolo_model_path = "../data/models/yolov8n-face.onnx",
    mobilefacenet_model_path = "../data/models/mobilefacenet_fixed.onnx",
    input_directory = "../data/images/",
    output_directory = "../data/known_faces"
)
processor = ImagePreprocessor()
processor.process_database_images()

[INFO] Loading YOLOv8n face-model from ../data/models/yolov8n-face.onnx
[INFO] Loading MobileFaceNet model from ../data/models/mobilefacenet_fixed.onnx
[INFO] Processing ../data/images/tom/left.jpg
[ERROR] At least one dimension is smaller than 640
[ERROR] Failed to process ../data/images/tom/left.jpg
[INFO] Processing ../data/images/tom/front.jpg
[ERROR] Face recognition failed: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input:0 for the following indices
 index: 1 Got: 3 Expected: 112
 index: 3 Got: 112 Expected: 3
 Please fix either the inputs/outputs or the model.
[INFO] Saved processed face to ../data/known_faces/tom/front.jpg
[INFO] Processing ../data/images/tom/right.jpg
[ERROR] Face recognition failed: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input:0 for the following indices
 index: 1 Got: 3 Expected: 112
 index: 3 Got: 112 Expected: 3
 Please fix either the inputs/outputs or the model.
[INFO] Saved process

TypeError: unsupported operand type(s) for +: 'NoneType' and 'NoneType'

In [3]:
def func(**kwargs):
    print(kwargs)

func(you="you", me="me", he="he")

def func1(*args):
    print(args)

func1(["you", "me", "she"], {1:"1", 2: "2", 3: "3"})

{'you': 'you', 'me': 'me', 'he': 'he'}
(['you', 'me', 'she'], {1: '1', 2: '2', 3: '3'})


In [22]:
class Person:
    eyes = "cute"
    hair = "long"
    height = 176

    @classmethod
    def update_vars(cls, **kwargs):
        print(kwargs)
        print(kwargs.items())
        for key, value in kwargs.items():
            if hasattr(cls, key):
                setattr(cls, key, value)
            else:
                print(f"No such variable as {key}")

    @classmethod
    def display_vars(cls):
        print(f"Eyes are {cls.eyes}")
        print(f"Hair is {cls.hair}")
        print(f"Height is {cls.height}")

Person.display_vars()
Person.update_vars(**{
        "eyes": "sharp",
        "hair": "short",
        "height": 173
    }
)
Person.display_vars()

Eyes are cute
Hair is long
Height is 176
{'eyes': 'sharp', 'hair': 'short', 'height': 173}
dict_items([('eyes', 'sharp'), ('hair', 'short'), ('height', 173)])
Eyes are sharp
Hair is short
Height is 173
