Only keep YOLO detections if confidence > 0.30 (30%).

This should apply for both classes (top = 0, side = 1).

If below threshold → ignore that detection.

Here’s the updated compact YOLO + HRNet pipeline with confidence check:


Output:
Top view (class 0) → only bbox.

Side view (class 1) → bbox + HRNet plucking point in red.

#### For Single Image Only

In [1]:
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
from train_hrnet_pluck import HRNet, preprocess_img

# -------------------------------
# CONFIG
# -------------------------------
yolo_model_path = "E:/Project_Work/2025/Saffron_Project/Github_Code/Saffron_Detection/YoloV8+HRNet/YoloV8_Result_Object_detection/Result_Weight/detect/weights/best.pt"   # YOLOv8 weights
hrnet_model_path = "hrnet_pluck_best.pth"              # HRNet weights
device = "cuda" if torch.cuda.is_available() else "cpu"
img_size = 640
conf_thresh = 0.30   # confidence threshold (30%)

# -------------------------------
# LOAD MODELS
# -------------------------------
yolo = YOLO(yolo_model_path)

hrnet = HRNet(num_keypoints=1)
hrnet.load_state_dict(torch.load(hrnet_model_path, map_location=device))
hrnet.to(device).eval()

# -------------------------------
# HRNet PREDICTION FUNCTION
# -------------------------------
def predict_pluck_point(crop_img):
    img = preprocess_img(crop_img, img_size).to(device)
    with torch.no_grad():
        out = hrnet(img).cpu().numpy()[0]
    px, py = out[0], out[1]   # normalized coords (0-1)
    return px, py

# -------------------------------
# YOLO + HRNet PIPELINE
# -------------------------------
def run_pipeline(image_path):
    img = cv2.imread(image_path)
    results = yolo.predict(image_path, device=device, verbose=False)[0]

    for box in results.boxes:
        conf = float(box.conf[0].item())
        cls = int(box.cls[0].item())
        if conf < conf_thresh:   # skip low confidence
            continue

        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())

        # Draw YOLO bbox
        cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
        label = f"{'top' if cls==0 else 'side'} ({conf:.2f})"
        cv2.putText(img, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 
                    0.7, (0,255,0), 2)

        # If side flower → HRNet plucking point
        if cls == 1:
            crop = img[y1:y2, x1:x2]
            px, py = predict_pluck_point(crop)

            # Convert back to absolute coords on original image
            cx = int(x1 + px * (x2 - x1))
            cy = int(y1 + py * (y2 - y1))

            # Draw point
            cv2.circle(img, (cx, cy), 6, (0,0,255), -1)
            cv2.putText(img, "Pluck", (cx+5, cy), cv2.FONT_HERSHEY_SIMPLEX,
                        0.6, (0,0,255), 2)

    # Show final result
    plt.figure(figsize=(8,8))
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis("off")
    plt.show()

# -------------------------------
# RUN
# -------------------------------
run_pipeline("test.jpg")


  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'HRNet' from 'train_hrnet_pluck' (e:\Project_Work\2025\Saffron_Project\Github_Code\Saffron_Detection\YoloV8+HRNet\train_hrnet_pluck.py)

#### For Multiple Images 

In [5]:
import os
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as T
from torchvision.models import resnet18
from ultralytics import YOLO
import numpy as np

# -------------------------------
# HRNet (simplified resnet backbone)
# -------------------------------
class HRNet(nn.Module):
    def __init__(self, num_keypoints=1):
        super(HRNet, self).__init__()
        backbone = resnet18(pretrained=True)
        self.backbone = nn.Sequential(*list(backbone.children())[:-2])
        self.head = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, num_keypoints, kernel_size=1)
        )
    
    def forward(self, x):
        feat = self.backbone(x)
        out = self.head(feat)
        out = out.mean(dim=[2, 3])  # (B, num_keypoints)
        return out

def preprocess_img(img, img_size=640):
    transform = T.Compose([
        T.ToPILImage(),
        T.Resize((img_size, img_size)),
        T.ToTensor(),
    ])
    return transform(img).unsqueeze(0)

# -------------------------------
# CONFIG
# -------------------------------
yolo_model_path = "E:/Project_Work/2025/Saffron_Project/Github_Code/Saffron_Detection/YoloV8+HRNet/YoloV8_Result_Object_detection/Result_Weight/detect/weights/best.pt"
hrnet_model_path = "hrnet_pluck_best.pth"
input_folder = "E:/Project_Work/2025/Saffron_Project/Github_Code/Saffron_Detection/YoloV8+HRNet/YoloV8_Result_Object_detection/test/images_field"
output_folder = "output_results"
confidence_thresh = 0.30

os.makedirs(output_folder, exist_ok=True)

# -------------------------------
# LOAD MODELS
# -------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load YOLO
yolo = YOLO(yolo_model_path)

# Load HRNet properly
checkpoint = torch.load(hrnet_model_path, map_location=device)
state_dict = checkpoint.get("model", checkpoint)  # extract model if checkpoint contains dict
hrnet = HRNet(num_keypoints=1).to(device)
hrnet.load_state_dict(state_dict, strict=False)
hrnet.eval()

# -------------------------------
# INFERENCE LOOP
# -------------------------------
for file in os.listdir(input_folder):
    if not file.lower().endswith((".jpg", ".png", ".jpeg")):
        continue
    
    img_path = os.path.join(input_folder, file)
    img = cv2.imread(img_path)
    if img is None:
        continue
    
    # Run YOLO
    results = yolo(img)[0]
    for i, box in enumerate(results.boxes):
        conf = float(box.conf[0])
        cls = int(box.cls[0])
        if conf < confidence_thresh:
            continue
        
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        crop = img[y1:y2, x1:x2]
        if crop.size == 0:
            continue

        # Run HRNet on cropped region
        inp = preprocess_img(crop).to(device)
        with torch.no_grad():
            keypoints = hrnet(inp).cpu().numpy()[0]
        # keypoints are normalized (0-1), convert to pixel coords in the crop
        cx = int(x1 + keypoints[0] * (x2 - x1))
        cy = int(y1 + keypoints[0] * (y2 - y1))  # assuming single keypoint y normalized same as x
        # If your training stored x,y separately, adjust this

        # Draw bbox + keypoints
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, f"YOLO conf: {conf:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(img, (cx, cy), 5, (0, 0, 255), -1)
        cv2.putText(img, "Pluck", (cx+5, cy), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (0, 0, 255), 2)

    # Save result
    save_path = os.path.join(output_folder, file)
    cv2.imwrite(save_path, img)
    print(f"Processed: {file} -> {save_path}")

print("✅ All images processed.")


  checkpoint = torch.load(hrnet_model_path, map_location=device)



0: 480x640 2 Saffron_Sides, 114.4ms
Speed: 28.1ms preprocess, 114.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_12.jpeg -> output_results\image_12.jpeg

0: 480x640 5 Saffron_Sides, 12.8ms
Speed: 1.3ms preprocess, 12.8ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_13.jpeg -> output_results\image_13.jpeg

0: 480x640 6 Saffron_Sides, 13.0ms
Speed: 2.2ms preprocess, 13.0ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_15.jpeg -> output_results\image_15.jpeg

0: 480x640 5 Saffron_Sides, 12.9ms
Speed: 1.3ms preprocess, 12.9ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_17.jpeg -> output_results\image_17.jpeg

0: 640x480 6 Saffron_Sides, 56.3ms
Speed: 1.6ms preprocess, 56.3ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)
Processed: image_19.jpeg -> output_results\image_19.jpeg

0: 480x640 3 Saffron_Sides, 14.0ms
Speed: 1.7m

In [6]:
import os
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as T
from ultralytics import YOLO
import timm  # for HRNet-W18 backbone

# -------------------------------
# HRNet-W18 definition
# -------------------------------
class HRNet(nn.Module):
    def __init__(self, num_keypoints=1, backbone="hrnet_w18", head_hidden=512):
        super(HRNet, self).__init__()
        self.backbone = timm.create_model(backbone, pretrained=True, num_classes=0)  # remove classifier
        self.head = nn.Sequential(
            nn.Linear(self.backbone.num_features, head_hidden),
            nn.ReLU(),
            nn.Linear(head_hidden, num_keypoints*2)  # output normalized x,y
        )
    
    def forward(self, x):
        feat = self.backbone(x)
        out = self.head(feat)
        return torch.sigmoid(out)  # normalized 0-1

def preprocess_img(img, img_size=640):
    transform = T.Compose([
        T.ToPILImage(),
        T.Resize((img_size, img_size)),
        T.ToTensor(),
    ])
    return transform(img).unsqueeze(0)

# -------------------------------
# CONFIG
# -------------------------------
yolo_model_path = "E:/Project_Work/2025/Saffron_Project/Github_Code/Saffron_Detection/YoloV8+HRNet/YoloV8_Result_Object_detection/Result_Weight/detect/weights/best.pt"
hrnet_model_path = "hrnet_pluck_best.pth"
input_folder = "E:/Project_Work/2025/Saffron_Project/Github_Code/Saffron_Detection/YoloV8+HRNet/YoloV8_Result_Object_detection/test/images_field"
output_folder = "output_results"
confidence_thresh = 0.30
img_size = 640

os.makedirs(output_folder, exist_ok=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# -------------------------------
# LOAD MODELS
# -------------------------------
yolo = YOLO(yolo_model_path)

checkpoint = torch.load(hrnet_model_path, map_location=device)
state_dict = checkpoint.get("model", checkpoint)

hrnet = HRNet(num_keypoints=1, backbone="hrnet_w18", head_hidden=512).to(device)
hrnet.load_state_dict(state_dict, strict=False)
hrnet.eval()

# -------------------------------
# INFERENCE LOOP
# -------------------------------
for file in os.listdir(input_folder):
    if not file.lower().endswith((".jpg", ".png", ".jpeg")):
        continue
    
    img_path = os.path.join(input_folder, file)
    img = cv2.imread(img_path)
    if img is None:
        continue

    results = yolo(img)[0]
    for box in results.boxes:
        conf = float(box.conf[0])
        cls = int(box.cls[0])
        if conf < confidence_thresh:
            continue
        
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        crop = img[y1:y2, x1:x2]
        if crop.size == 0:
            continue

        inp = preprocess_img(crop, img_size=img_size).to(device)
        with torch.no_grad():
            kp = hrnet(inp).cpu().numpy()[0]  # shape [2] normalized x,y

        cx = int(x1 + kp[0] * (x2 - x1))
        cy = int(y1 + kp[1] * (y2 - y1))

        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, f"YOLO conf: {conf:.2f}", (x1, y1-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
        cv2.circle(img, (cx, cy), 5, (0,0,255), -1)
        cv2.putText(img, "Pluck", (cx+5, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 2)

    save_path = os.path.join(output_folder, file)
    cv2.imwrite(save_path, img)
    print(f"Processed: {file} -> {save_path}")

print("✅ All images processed.")


  checkpoint = torch.load(hrnet_model_path, map_location=device)



0: 480x640 2 Saffron_Sides, 13.3ms
Speed: 6.8ms preprocess, 13.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_12.jpeg -> output_results\image_12.jpeg

0: 480x640 5 Saffron_Sides, 12.9ms
Speed: 1.1ms preprocess, 12.9ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_13.jpeg -> output_results\image_13.jpeg

0: 480x640 6 Saffron_Sides, 13.7ms
Speed: 1.3ms preprocess, 13.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_15.jpeg -> output_results\image_15.jpeg

0: 480x640 5 Saffron_Sides, 11.7ms
Speed: 1.1ms preprocess, 11.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)
Processed: image_17.jpeg -> output_results\image_17.jpeg

0: 640x480 6 Saffron_Sides, 13.8ms
Speed: 1.3ms preprocess, 13.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)
Processed: image_19.jpeg -> output_results\image_19.jpeg

0: 480x640 3 Saffron_Sides, 12.4ms
Speed: 1.3ms p