In [11]:
from transformers import AutoImageProcessor, AutoModelForObjectDetection

processor = AutoImageProcessor.from_pretrained("PekingU/rtdetr_r101vd", cache_dir="./hf-models")
model = AutoModelForObjectDetection.from_pretrained("PekingU/rtdetr_r101vd", cache_dir="./hf-models")
model.to("cuda")

RTDetrForObjectDetection(
  (model): RTDetrModel(
    (backbone): RTDetrConvEncoder(
      (model): RTDetrResNetBackbone(
        (embedder): RTDetrResNetEmbeddings(
          (embedder): Sequential(
            (0): RTDetrResNetConvLayer(
              (convolution): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
              (normalization): RTDetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
            (1): RTDetrResNetConvLayer(
              (convolution): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (normalization): RTDetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
            (2): RTDetrResNetConvLayer(
              (convolution): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (normalization): RTDetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
          )
          (pooler): MaxPool2d(

In [16]:
import torch
from PIL import Image, ImageDraw
import requests



image = Image.open('./images/eva.jpg')


inputs = processor(images=image, return_tensors="pt").to("cuda")


model.eval()
outputs = model(**inputs)


target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.5)


draw = ImageDraw.Draw(image)
for result in results:
    for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
        score, label = score.item(), label_id.item()
        box = [round(i, 2) for i in box.tolist()]
        draw.rectangle(box, outline="red", width=3)
        label_text = model.config.id2label[label]
        draw.text((box[0], box[1]), f"{label_text}: {score:.2f}", fill="red")

# image

In [2]:
import torchvision

In [6]:
inputs.keys()

dict_keys(['pixel_values'])

In [3]:
import cv2
cap = cv2.VideoCapture("http://192.168.253.2:4747/video")
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
while True:
    ret, frame = cap.read()
    if (not ret):
        print("Error reading frame")
        break
    cv2.imshow("Network Camera", frame)
    if cv2.waitKey(1) == ord('q'):
        break
cv2.destroyAllWindows()

Error reading frame


In [1]:
import cv2
from ultralytics import YOLO

# Load a pretrained YOLO11n model
model = YOLO("yolo11n.pt")
results = model("./images/eva.jpg")


image 1/1 c:\Users\hanma\Programming\nextjs-learn\ai-python\images\eva.jpg: 480x640 1 umbrella, 1 kite, 64.5ms
Speed: 6.6ms preprocess, 64.5ms inference, 157.1ms postprocess per image at shape (1, 3, 480, 640)


In [3]:
import cv2


cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open video device")
    exit()

width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

print(f"摄像头分辨率: {int(width)}x{int(height)}")

# 释放摄像头
cap.release()

摄像头分辨率: 640x480


In [2]:
import cv2
def list_cameras(max_cameras=10):
    available_cameras = []
    for i in range(max_cameras):
        cap = cv2.VideoCapture(i)
        if cap.isOpened():
            available_cameras.append(i)
            cap.release()
    return available_cameras

# 列出所有可用的摄像头
cameras = list_cameras()
print(f"Available cameras: {cameras}")

Available cameras: [0]


# opencv font

In [7]:
import cv2

# 创建一个黑色图像
img = cv2.imread('./images/eva.jpg')

# 在图像上绘制文本
cv2.putText(img, 'Simplex', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 1)
cv2.putText(img, 'Plain', (10, 60), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 1)
cv2.putText(img, 'Duplex', (10, 90), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 0), 1)
cv2.putText(img, 'Complex', (10, 120), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 0), 1)
cv2.putText(img, 'Triplex', (10, 150), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0), 1)
cv2.putText(img, 'Complex Small', (10, 180), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 1)
cv2.putText(img, 'Script Simplex', (10, 210), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (255, 255, 0), 1)
cv2.putText(img, 'Script Complex', (10, 240), cv2.FONT_HERSHEY_SCRIPT_COMPLEX, 1, (255, 255, 0), 1)
cv2.putText(img, 'Italic', (10, 270), cv2.FONT_HERSHEY_SIMPLEX | cv2.FONT_ITALIC, 1, (255, 255, 0), 1)

# 显示图像
cv2.imshow('Image with Text', img)
cv2.waitKey(0)
cv2.destroyAllWindows()