## Running multiple models with warm-up

This notebook demonstrates how to load and run multiple AI models using DeGirum PySDK on a Hailo-8 or Hailo-8L device.

It showcases the model warm-up technique, which involves running a single dummy inference on each model after loading. This step ensures all runtime resources and tensor buffers are initialized, which avoids latency spikes during the first real inference.

In [None]:
!pip install degirum degirum_tools numpy opencv-python -q

#### Necessary imports and loading models

In [None]:
import cv2, numpy as np, degirum as dg, degirum_tools, time
from PIL import Image

# ------------------------------------------------------------------
# 1. SETUP
# ------------------------------------------------------------------
host = "@local"
zoo = "degirum/hailo"
device_type = "HAILORT/HAILO8L"
token=''
pose_model_name = "yolov8n_relu6_coco_pose--640x640_quant_hailort_hailo8l_1"
face_model_name = "scrfd_500m--640x640_quant_hailort_hailo8l_1"
face_vec_model_name = "arcface_mobilefacenet--112x112_quant_hailort_hailo8l_1"


#### Comparing latency with warmup and without warmup

In [None]:
# Load a Hailo model
model = dg.load_model(
    model_name=pose_model_name,
    inference_host_address=host,
    token=token,
    zoo_url=zoo,
    device_type=device_type
)

dummy_input = np.zeros((640,640,3), dtype=np.uint8)

# --- Inference WITHOUT warm-up ---
start = time.time()
_ = model(dummy_input)
t1 = time.time() - start
print(f"First inference (no warm-up): {t1*1000:.1f} ms")

# --- Inference WITH warm-up ---
_ = model(dummy_input)  # warm-up step

start = time.time()
_ = model(dummy_input)
t2 = time.time() - start
print(f"Subsequent inference (warmed up): {t2*1000:.1f} ms")


## Multi-model inference pipeline with warmup


We run Pose detection continuously on every frame. If a dummy condition is met (e.g., more than one person detected), we run face detection model to localize faces and then we use face embedding (vector) model on each detected face. This showcases how using a dummy inference reduces latency while model switching

#### Loading models and running warm-up inference

In [None]:

print("Loading models...")
PoseModel = dg.load_model(model_name=pose_model_name, inference_host_address=host, zoo_url=zoo, token=token, device_type=device_type)
FaceModel = dg.load_model(model_name=face_model_name, inference_host_address=host, zoo_url=zoo, token=token, device_type=device_type)
FaceVectorModel = dg.load_model(model_name=face_vec_model_name, inference_host_address=host, zoo_url=zoo, token=token, device_type=device_type)


# Dummy image for warm-up
dummy_pose_img = np.zeros((640,640,3), dtype=np.uint8)
dummy_face_img = np.zeros((640,640,3), dtype=np.uint8)
dummy_face_crop = np.zeros((112,112,3), dtype=np.uint8)

print("Warming up models...")
PoseModel(dummy_pose_img)
FaceModel(dummy_face_img)
FaceVectorModel(dummy_face_crop)

print("Warm-up complete. Models are ready for real-time inference.")

#### Running inference

In [None]:
combined_model = degirum_tools.CombiningCompoundModel(PoseModel, FaceModel)
inference_stream = degirum_tools.predict_stream(combined_model, 0)

with degirum_tools.Display("Results") as display:
    for inference_result in inference_stream:
        annotated_frame = inference_result.image_overlay.copy()  # copy to modify

        for detection in inference_result.results:
            if detection.get("label") == "face" and "bbox" in detection:
                print("\n ------- POSE + FACE DETECTION RESULT -------")
                print(f"- Detected: {detection['label']} at {detection.get('bbox', 'N/A')}")
                x1, y1, x2, y2 = map(int, detection["bbox"])
                face_crop = annotated_frame[y1:y2, x1:x2]

                if face_crop.shape[0] > 0 and face_crop.shape[1] > 0:
                    face_resized = cv2.resize(face_crop, (112, 112))
                    vec_result = FaceVectorModel(face_resized)
                    embedding = np.asarray(vec_result.results[0]["data"]).flatten()

                    emb_id = embedding[0]
                    emb_norm = np.linalg.norm(embedding)
                    label_text = f"VecID: {emb_id:.2f}, Norm: {emb_norm:.2f}"

                    # Print embedding debug info
                    print("\n -------- FACE VECTOR RESULT ----------")
                    print(f"Embedding Length: {len(embedding)}")
                    print(f"First 5 Vector Values: {embedding[:5]}")
                    print(f"Norm: {emb_norm:.2f}")

                    # Draw label and box using OpenCV
                    cv2.putText(annotated_frame, label_text, (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (255, 255, 0), 1)

        display.show(annotated_frame)