In [3]:
import cv2
import numpy as np
import torch
import yaml
import sys

# Modify sys.path
# ** 패스 변경 필요
sys.path.insert(0, "./yolov5/") # yolov5 패스지정

# Now import attempt_load
from yolov5.models.experimental import attempt_load

# Load the "custom" YOLOv5 model
# ** 0813 모델: 0812 데이터로 학습, 크기: 640x640
model = attempt_load('./best.pt')

# Initialize the webcam capture
webcam_cap = cv2.VideoCapture(0)

# Load class names from data.yaml
with open('./yolov5/BATCAM-MX-Data-Labeling-1/data.yaml', 'r') as yaml_file:
    data = yaml.safe_load(yaml_file)
    class_names = data['names']

while True:
    # Capture the webcam frame
    ret, webcam_frame = webcam_cap.read()
    
    # Check if frame read is valid
    if not ret:
        print("Failed to grab frame.")
        continue
    
    # Convert the webcam frame from BGR to RGB and reshape for model input
    img = cv2.cvtColor(webcam_frame, cv2.COLOR_BGR2RGB)
    img_tensor = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0) / 255.0
    
    # Pass the frame through the YOLOv5 model
    results = model(img_tensor)

    # Extract tensor from results tuple
    detections = results[0]

    # Assuming there's a confidence threshold you want to apply
    conf_thresh = 0.6

    # Use the confidence score to filter out weak detections
    mask = detections[0, :, 4] > conf_thresh

    # Extract the boxes, scores, and classes from the detections
    boxes = detections[0, mask, :4].cpu().numpy()
    scores = detections[0, mask, 4].cpu().numpy()
    classes = detections[0, mask, 5].cpu().numpy().astype(np.int32)

    # Draw the bounding boxes and labels on the frame
    for box, score, class_idx in zip(boxes, scores, classes):
        x1, y1, x2, y2 = map(int, box)
        class_name = class_names[class_idx]
        cv2.rectangle(webcam_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(webcam_frame, f"{class_name}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Print the coordinates of the detected object
        print(f"{class_name} coordinates: ({x1}, {y1}), ({x2}, {y2})")

    # Display the frame on the screen
    cv2.imshow('Webcam Capture', webcam_frame)

    # Exit the program if the user presses the 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close the window
webcam_cap.release()
cv2.destroyAllWindows()

Fusing layers... 
Model summary: 157 layers, 7026307 parameters, 0 gradients, 15.8 GFLOPs


Flange coordinates: (104, 228), (47, 50)
Flange coordinates: (102, 229), (52, 49)
Flange coordinates: (101, 231), (51, 47)
Flange coordinates: (102, 229), (49, 46)
Flange coordinates: (102, 230), (49, 47)
Flange coordinates: (364, 380), (79, 40)
Flange coordinates: (105, 230), (51, 50)
Flange coordinates: (365, 382), (84, 40)
Flange coordinates: (103, 230), (56, 48)
Flange coordinates: (364, 381), (83, 40)
Flange coordinates: (364, 379), (77, 37)
Flange coordinates: (364, 381), (93, 38)
Flange coordinates: (104, 230), (50, 41)
Flange coordinates: (104, 231), (50, 42)
Flange coordinates: (362, 382), (72, 38)
Flange coordinates: (363, 381), (72, 36)
Flange coordinates: (363, 380), (79, 39)
Flange coordinates: (105, 229), (50, 49)
Flange coordinates: (365, 383), (84, 39)
Flange coordinates: (362, 379), (81, 38)
Flange coordinates: (214, 216), (53, 47)
Flange coordinates: (103, 230), (55, 48)
Flange coordinates: (364, 382), (82, 39)
Flange coordinates: (364, 379), (77, 37)
Flange coordinat