In [20]:
from torchvision.io import read_image
from torchvision.io.image import read_image
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

img = read_image("lanternBearers.jpg")

In [16]:
weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)
model.eval()

preprocess = weights.transforms()

#Unsqueeze(0) makes [] --> [[]], putting image into a batch by itself
batch = preprocess(img).unsqueeze(0) 

prediction = model(batch).squeeze(0).softmax(0)
classId = prediction.argmax().item() #Gets max prediction id as python number

#"Confidence" value (kinda), is based on the spread of the prediction tensor
score = prediction[classId].item() 

categoryName = weights.meta["categories"][classId]
print(f"{categoryName}: {100 * score:.1f}%")

rugby ball: 68.0%


In [31]:
img = read_image("LSupper.jpg")

# Step 1: Initialize model with the best available weights
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = [preprocess(img)]

# Step 4: Use the model and visualize the prediction
prediction = model(batch)[0]
labels = [weights.meta["categories"][i] for i in prediction["labels"]]
box = draw_bounding_boxes(img, boxes=prediction["boxes"],
                          labels=labels,
                          colors="red",
                          width=4, font_size=30)

im = to_pil_image(box.detach())
im.show()

In [28]:
import torch

# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Images
imgs = ['LSupper.jpg']  # batch of images

# Inference
results = model(imgs)

# Results
results.print()
results.save()  # or .show()

results.xyxy[0]  # img1 predictions (tensor)
results.pandas().xyxy[0]  # img1 predictions (pandas)

Using cache found in C:\Users\Ryan O'Mullan/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-10-24 Python-3.9.7 torch-2.1.0+cu118 CUDA:0 (NVIDIA GeForce GTX 1060 3GB, 3072MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
image 1/1: 320x640 11 persons
Speed: 13.0ms pre-process, 44.3ms inference, 4.0ms NMS per image at shape (1, 3, 320, 640)
Saved 1 image to [1mruns\detect\exp2[0m


Unnamed: 0,xmin,ymin,xmax,ymax,confidence,class,name
0,2.598087,115.20948,98.243042,301.753845,0.794459,0,person
1,267.703918,124.396439,369.134644,210.345383,0.767014,0,person
2,203.675827,129.155609,274.49762,208.546539,0.763258,0,person
3,341.361237,123.533417,436.656464,205.5896,0.70825,0,person
4,415.60141,118.371002,539.326172,207.589722,0.699998,0,person
5,159.63298,140.302322,210.648544,215.316971,0.696168,0,person
6,559.964783,127.92321,639.555115,297.294189,0.690447,0,person
7,102.467194,122.0961,167.768311,208.072571,0.62751,0,person
8,82.588486,120.440849,120.281998,205.197662,0.488871,0,person
9,404.433807,110.397285,469.572174,177.226196,0.473802,0,person
