In [6]:
from ultralytics import YOLO
from mmocr.apis import MMOCRInferencer
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import torch
import cv2
import os

2023-10-14 09:11:21,902 - modelscope - INFO - PyTorch version 2.0.1+cu117 Found.
2023-10-14 09:11:21,905 - modelscope - INFO - TensorFlow version 2.14.0 Found.
2023-10-14 09:11:21,906 - modelscope - INFO - Loading ast index from C:\Users\Олег\.cache\modelscope\ast_indexer
2023-10-14 09:11:22,071 - modelscope - INFO - Loading done! Current index file version is 1.9.2, with md5 d900a8624d792d555ed3cef91c01c35b and a total number of 941 components indexed


In [39]:
'''
    detection_mode: ./models/custom_yolov8pt_25_orig.pt
    rec_model: damo/cv_convnextTiny_ocr-recognition-general_damo
    angle_rec_model: Aster
'''

DETECTION_SAVE_PATH = './yolo_detections/results/crops/number/'

BIN_TYPES = {
    'ADAPTIVE_THRESH_GAUSSIAN_C': cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    'ADAPTIVE_THRESH_MEAN_C': cv2.ADAPTIVE_THRESH_MEAN_C,
    'THRESH_OTSU': cv2.THRESH_OTSU
}

class NumberOcrModel:
    def __init__(self, detection_model, rec_model, angle_rec_model):
        self.detection_model = YOLO(detection_model)
        self.rec_model = pipeline(Tasks.ocr_recognition, model=rec_model)
        self.angle_rec_model = MMOCRInferencer(rec=angle_rec_model)

        self.detection_result = None
        self.img_path = None
        self.image_name = None
        self.bin_type = None

        model.prepare_model()

    def prepare_model(self):
        if torch.cuda.is_available():
            self.detection_model.to('cuda')

    def preprocess(self):
        self.image_name = os.path.basename(os.path.normpath(self.img_path))

        result = self.detection_model.predict(self.img_path, save = True, save_crop=True, project='yolo_detections', name='results')
        self.detection_result = result

        if self.bin_type:
            img = cv2.imread(DETECTION_SAVE_PATH + self.image_name)
            blur_img = cv2.GaussianBlur(img,(1,1),0)
            bin_img = cv2.adaptiveThreshold(blur_img, 255, BIN_TYPES[self.bin_type], cv2.THRESH_BINARY_INV, 29, -4)
            cv2.imwrite(DETECTION_SAVE_PATH + self.image_name, bin_img)

    def recognize(self):
        rec_result = self.rec_model(DETECTION_SAVE_PATH + self.image_name)
        angle_rec_result = self.angle_rec_model(DETECTION_SAVE_PATH + self.image_name)

        result = {
            'Recognition_model': rec_result,
            'Angele_recognition_model': angle_rec_result,
        }

        return result

    def predict(self, img_path, bin_type = None):
        self.img_path = img_path
        self.bin_type = bin_type

        self.preprocess()
        return self.recognize()


In [40]:
model = NumberOcrModel(
    detection_model='./models/custom_yolov8pt_25_orig.pt',
    rec_model='damo/cv_convnextTiny_ocr-recognition-general_damo',
    angle_rec_model='Aster'
)

2023-10-14 09:42:11,916 - modelscope - INFO - Model revision not specified, use revision: v2.3.0
2023-10-14 09:42:12,483 - modelscope - INFO - initiate model from C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo
2023-10-14 09:42:12,485 - modelscope - INFO - initiate model from location C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo.
2023-10-14 09:42:12,492 - modelscope - INFO - initialize model from C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo
2023-10-14 09:42:12,716 - modelscope - INFO - loading model from dir C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo
2023-10-14 09:42:12,841 - modelscope - INFO - loading model done


Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textrecog/aster/aster_resnet45_6e_st_mj/aster_resnet45_6e_st_mj-cc56eca4.pth




In [41]:
model.predict('./data/63098560.jpg')


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\data\63098560.jpg: 480x800 1 number, 807.0ms
Speed: 5.0ms preprocess, 807.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results3[0m


Output()

{'Recognition_model': {'text': ['63098560']},
 'Angele_recognition_model': {'predictions': [{'rec_texts': ['63098560'],
    'rec_scores': [0.9945330023765564]}],
  'visualization': []}}