In [1]:
!git clone https://github.com/ultralytics/yolov5
!pip install -qr yolov5/requirements.txt

Cloning into 'yolov5'...


In [2]:
import sys
sys.path.insert(0, 'yolov5')

In [3]:
import cv2
import torch
from pathlib import Path
from yolov5.models.experimental import attempt_load
from yolov5.utils.augmentations import letterbox
from yolov5.utils.general import non_max_suppression, scale_boxes
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator

def process_image(img, imgsz: int):
    img = letterbox(img, imgsz)[0]
    img = img[:, :, ::-1].copy().transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = torch.from_numpy(img).float()
    img = img / 255.0  # 0 - 255 to 0.0 - 1.0
    return img.unsqueeze(0)

def detect_text(weights_path, image_path, imgsz=640, conf_thres=0.25, iou_thres=0.45, names=None):
    device = select_device('')  # 'cuda' if torch.cuda.is_available() else 'cpu'
    model = attempt_load(weights_path, device)  # Load YOLOv5 model

    img0 = cv2.imread(image_path)  # Load image as BGR
    img = process_image(img0, imgsz).to(device)

    model.eval()
    with torch.no_grad():
        t1 = time_sync()
        pred = model(img, augment=None)[0]

        pred = non_max_suppression(pred, conf_thres, iou_thres, classes=None)

    detected_texts = []
    for i, det in enumerate(pred):  
        if len(det):
            det = scale_boxes(img.shape[2:], det, img0.shape)
            det[:, :4] = det[:, :4].round()

            annotator = Annotator(img0)
            for *xyxy, conf, cls in reversed(det):
                label = f'{names[int(cls)]} {conf:.2f}'
                detected_texts.append(label)
                annotator.box_label(xyxy, label)
            annotator.im_show()

    image_output = "output.jpg"
    cv2.imwrite(image_output, img0)
    print(f"Output image saved as {image_output}")
    
    return detected_texts, img0

weights = 'yolov5x.pt'  # Pre-trained model, download from https://github.com/ultralytics/yolov5/releases
image_path = '../../data/raw/1687408498051.jpg'  # Input image
names = ['text']  # Names of the detected text classes
detected_texts, img = detect_text(weights, image_path, names=names, imgsz=1024, conf_thres=0.1)

print("Detected texts:", detected_texts)

YOLOv5  v7.0-187-g0004c74 Python-3.10.11 torch-2.0.1+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x.pt to yolov5x.pt...
100%|██████████| 166M/166M [00:06<00:00, 25.5MB/s] 

Fusing layers... 
YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients


Output image saved as output.jpg
Detected texts: []
