In [None]:
# !pip install ultralytics

In [14]:
import torch

print(torch.__version__)

2.0.1+cu118


In [None]:
from ultralytics import YOLO
import cv2

# Create a YOLOv8 model
model = YOLO("yolov8n.pt")

# Start webcam
cap = cv2.VideoCapture(0)

# Open the file to save detection results
f = open("object_locations.txt", "w")

while True:
    # Read one frame from the webcam
    ret, frame = cap.read()
    if not ret:
        break

    # Predict using the YOLO model
    results = model.predict(frame)

    # Iterate over each detection
    for i in range(results.shape[1]):
        detection = results[0][i]
        
        # Here we are assuming that the first 4 elements are the bounding box coordinates,
        # the 5th element is the objectness score, and the rest are the class scores.
        bbox = detection[:4]
        objectness = detection[4]
        class_scores = detection[5:]
        
        # Find the class with the highest score
        class_id = np.argmax(class_scores)
        class_score = class_scores[class_id]

        # We will consider the detection valid if the confidence score is greater than 0.85
        if class_score > 0.85:
            # Write to file, draw bounding boxes, etc.
            # Note that the bbox coordinates will depend on how they are represented in the output.
            # For example, they could be [center_x, center_y, width, height] or [x1, y1, x2, y2].
            f.write(f"Class: {class_id}, BBox: {bbox}\n")

            # Draw the bounding box
            # Note that we are assuming the bbox coordinates are in the format [center_x, center_y, width, height].
            # Depending on the model, this might be different (e.g., [x1, y1, x2, y2]).
            x1, y1 = int(bbox[0] - bbox[2] / 2), int(bbox[1] - bbox[3] / 2)
            x2, y2 = int(bbox[0] + bbox[2] / 2), int(bbox[1] + bbox[3] / 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Draw the class and confidence score
            label = f"Class: {class_id}, Confidence: {class_score:.2f}"
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Frame", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
f.close()

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8m.pt") 
model.export(format="onnx", imgsz=[480,640])

In [None]:
# !pip install onnxruntime

In [4]:
import onnxruntime
import numpy as np

# ONNX 모델 로드
ort_session = onnxruntime.InferenceSession("yolov8m.onnx")

# 입력 텐서 크기 설정
input_shape = (1, 3, 480, 640)
x = np.random.random(input_shape).astype(np.float32)

# ONNX 런타임에서 입력 및 출력 이름 가져오기
input_name = ort_session.get_inputs()[0].name
output_name = ort_session.get_outputs()[0].name

# ONNX 모델 실행
result = ort_session.run([output_name], {input_name: x})

# 결과 출력
print(result)

[array([[[5.1777692e+00, 1.0288152e+01, 1.7757238e+01, ...,
         5.3694403e+02, 5.7199719e+02, 5.8175946e+02],
        [3.7307391e+00, 3.7775056e+00, 4.2060409e+00, ...,
         4.2660059e+02, 4.1951974e+02, 4.0324100e+02],
        [1.0757503e+01, 2.0699392e+01, 3.4478863e+01, ...,
         2.6600211e+02, 2.8358252e+02, 2.6613248e+02],
        ...,
        [7.7486038e-07, 5.3644180e-07, 4.4703484e-07, ...,
         1.6391277e-06, 2.0861626e-06, 1.9073486e-06],
        [4.4703484e-07, 2.0861626e-07, 2.0861626e-07, ...,
         2.0265579e-06, 2.2649765e-06, 2.1755695e-06],
        [1.1920929e-06, 4.4703484e-07, 2.9802322e-07, ...,
         1.9073486e-06, 2.1457672e-06, 2.1457672e-06]]], dtype=float32)]


In [11]:
import cv2
import onnxruntime
import numpy as np
from PIL import Image
from torchvision import transforms

# ONNX 모델 로드
ort_session = onnxruntime.InferenceSession("yolov8m.onnx")

# ONNX 런타임에서 입력 및 출력 이름 가져오기
input_name = ort_session.get_inputs()[0].name
output_name = ort_session.get_outputs()[0].name

# 웹캠을 사용하거나 비디오 파일을 읽기 위한 VideoCapture 생성
cap = cv2.VideoCapture(0) # Use 0 for webcam, or replace with video file path

# Open the file to save detection results
f = open("object_locations.txt", "w")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # OpenCV는 BGR을 사용하지만, PyTorch는 RGB를 사용하므로 색상을 변환합니다.
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # 이미지를 PIL 이미지로 변환합니다.
    pil_image = Image.fromarray(frame)
    # 이미지를 모델 입력에 맞게 전처리 합니다.
    # 모델 입력 크기에 따라 변경해야 합니다.
    preprocess = transforms.Compose([
        transforms.Resize((480, 640)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    img = preprocess(pil_image)
    img = img.unsqueeze(0).numpy()

    # ONNX 모델 실행
    # results = ort_session.run([output_name], {input_name: img})
    results = ort_session.run([output_name], {input_name: img})
    # 모델의 결과를 확인합니다.
    print(f"Model output shape: {results[0].shape}")
    print(f"First detection output: {results[0][0]}")

    # Iterate over each detection
    for detection in results[0]:
        if detection[4] > 0.85:  # If the confidence score is greater than 0.85
            f.write(f"Class: {int(detection[5])}, BBox: {detection[:4]}\n")

            # Draw the bounding box
            x1, y1, x2, y2 = map(int, detection[:4])
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Draw the class and confidence score
            label = f"Class: {int(detection[5])}, Confidence: {detection[4]:.2f}"
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Frame", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
f.close()


Model output shape: (1, 84, 6300)
First detection output: [[     3.9902      13.686      29.069 ...      538.46      558.88      561.68]
 [     4.1284      4.4876      4.6358 ...      389.38      389.11      397.88]
 [     9.2838      28.388      55.138 ...      202.39       161.7      165.67]
 ...
 [ 2.0862e-07  2.0862e-07  1.7881e-07 ...  2.6822e-06  8.8215e-06  1.3709e-06]
 [ 1.1921e-07  1.4901e-07  1.1921e-07 ...  3.0994e-06  4.8578e-06  1.9372e-06]
 [ 1.4901e-07  1.7881e-07  1.4901e-07 ...  1.6391e-06  2.7716e-06  1.2815e-06]]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
import cv2

from yolov8 import YOLOv8

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Initialize YOLOv7 object detector
model_path = "models/yolov8m.onnx"
yolov8_detector = YOLOv8(model_path, conf_thres=0.5, iou_thres=0.5)

cv2.namedWindow("Detected Objects", cv2.WINDOW_NORMAL)
while cap.isOpened():

    # Read frame from the video
    ret, frame = cap.read()

    if not ret:
        break

    # Update object localizer
    boxes, scores, class_ids = yolov8_detector(frame)

    combined_img = yolov8_detector.draw_detections(frame)
    cv2.imshow("Detected Objects", combined_img)

    # Press key q to stop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

## YOLOV5

In [4]:
print(torch.__version__)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in C:\Users\user/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-7-17 Python-3.9.17 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)

Fusing layers... 


2.0.1+cu118


YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [10]:
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2,torch
# Check if CUDA is available and if so, use it
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Load the model
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s').to(device) # use the appropriate model

# # Prepare the dummy input
# dummy_input = torch.randn(1, 3, 640, 640).to(device)

# # Export the model to an ONNX file
# torch.onnx.export(model, dummy_input, "test.onnx", verbose=True, opset_version=16)
# Load the model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s').to(device)

# Prepare the dummy input
dummy_input = torch.randn(1, 3, 640, 640).to(device)

# Export the model to an ONNX file with do_constant_folding=False
torch.onnx.export(model, dummy_input, "test.onnx", verbose=True, opset_version=16, do_constant_folding=False)

Using cache found in C:\Users\user/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-7-17 Python-3.9.17 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:03<00:00, 4.89MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
  y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
  if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:


verbose: False, log level: Level.ERROR

