In [88]:
from ultralytics import YOLO
from mmocr.apis import MMOCRInferencer
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import torch
import cv2
import os
import csv
from os import walk
import pandas as pd
import re

In [89]:
'''
    detection_mode: ./models/custom_yolov8pt_25_orig.pt
    rec_model: damo/cv_convnextTiny_ocr-recognition-general_damo
    angle_rec_model: Aster
'''

DETECTION_SAVE_PATH = './yolo_detections/results/crops/number/'
MODEL_RESULT_PATH = './model_result/results.csv'

BIN_TYPES = {
    'ADAPTIVE_THRESH_GAUSSIAN_C': cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    'ADAPTIVE_THRESH_MEAN_C': cv2.ADAPTIVE_THRESH_MEAN_C,
    'THRESH_OTSU': cv2.THRESH_OTSU
}

class NumberOcrModel:
    def __init__(self, detection_model, rec_model, angle_rec_model):
        self.detection_model = YOLO(detection_model)
        self.rec_model = pipeline(Tasks.ocr_recognition, model=rec_model)
        self.angle_rec_model = MMOCRInferencer(rec=angle_rec_model)

        self.detection_result = None
        self.img_path = None
        self.image_name = None
        self.bin_type = None
        self.rec_result = None

        self.prepare_model()

    def prepare_model(self):
        if torch.cuda.is_available():
            self.detection_model.to('cuda')

    def preprocess(self, image_path, image_name, bin_prep):
        detection_result = self.detection_model.predict(image_path, save = True, save_crop=True, project='yolo_detections', name='results')

        if detection_result and bin_prep:
            img = cv2.imread(DETECTION_SAVE_PATH + image_name)
            blur_img = cv2.GaussianBlur(img,(1,1),0)
            bin_img = cv2.adaptiveThreshold(blur_img, 255, BIN_TYPES[bin_prep], cv2.THRESH_BINARY_INV, 29, -4)
            cv2.imwrite(DETECTION_SAVE_PATH + image_name, bin_img)

        return detection_result

    def recognize(self, image_name):
        all_dirs = os.listdir('./yolo_detections')
        max_length = len(max(all_dirs, key=len))
        data_dir = sorted([x for x in all_dirs if len(x) == max_length])[-1]

        crop_img_path = f'./yolo_detections/{data_dir}/crops/number/' + image_name
        rec_result = self.rec_model(crop_img_path)
        angle_rec_result = self.angle_rec_model(crop_img_path)

        num_1 = re.sub(r'[^0-9]', '', rec_result['text'][0])
        num_2 =  re.sub(r'[^0-9]', '', angle_rec_result['predictions'][0]['rec_texts'][0])

        result = [
            {
                'filename': image_name,
                'type': (0, 1)[len(num_1) > 0],
                'number': num_1,
                'is_correct': is_valid(num_1),
                'model': 'Recognition_model'
            },
            {
                'filename': image_name,
                'type': (0, 1)[len(num_2) > 0],
                'number': num_2,
                'is_correct': is_valid(num_2),
                'model': 'Angle_recognition_model'
            }
        ]
        return result

    def predict(self, img_path, bin_prep = None):
        image_name = os.path.basename(os.path.normpath(img_path))
        detected_data = self.preprocess(img_path, image_name, bin_prep)

        if len(detected_data[0].boxes.cpu().boxes.numpy()) == 0:
            return None
        return self.recognize(image_name)



In [90]:
model = NumberOcrModel(
    detection_model='./models/custom_yolov8pt_25_orig.pt',
    rec_model='damo/cv_convnextTiny_ocr-recognition-general_damo',
    angle_rec_model='Aster'
)

2023-10-14 14:02:45,586 - modelscope - INFO - Model revision not specified, use revision: v2.3.0
2023-10-14 14:02:46,054 - modelscope - INFO - initiate model from C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo
2023-10-14 14:02:46,055 - modelscope - INFO - initiate model from location C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo.
2023-10-14 14:02:46,060 - modelscope - INFO - initialize model from C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo
2023-10-14 14:02:46,273 - modelscope - INFO - loading model from dir C:\Users\Олег\.cache\modelscope\hub\damo\cv_convnextTiny_ocr-recognition-general_damo
2023-10-14 14:02:46,405 - modelscope - INFO - loading model done


Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textrecog/aster/aster_resnet45_6e_st_mj/aster_resnet45_6e_st_mj-cc56eca4.pth




In [73]:
# test_res = model.predict('./data/42338186.jpg')
# print(test_res)


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\data\42338186.jpg: 800x736 1 number, 121.1ms
Speed: 5.0ms preprocess, 121.1ms inference, 1.0ms postprocess per image at shape (1, 3, 800, 736)
Results saved to [1myolo_detections\results15[0m


Output()

['results', 'results10', 'results11', 'results12', 'results13', 'results14', 'results15', 'results2', 'results3', 'results4', 'results5', 'results6', 'results7', 'results8', 'results9']


In [11]:
def is_valid(result):
        cont_sum = 0
        control_num = -1

        if len(result) == 8:
            control_num = int(result[-1:])
            cont_sum = 0
            for i in range(7):
                num = int(result[i]) * (2, 1)[i % 2 == 1]
                if num >= 10:
                    cont_sum += sum(list(map(int, set(str(num)))))
                else:
                    cont_sum += num
        return int((10 - cont_sum % 10) == control_num)


def to_csv(results):
    with open(MODEL_RESULT_PATH, 'w', encoding='UTF8') as f:
        fields = ('filename', 'type', 'number', 'is_correct', 'model')
        writer = csv.DictWriter(f, fieldnames=fields, lineterminator = '\n')
        writer.writeheader()
        for res in results:
            writer.writerow(res[0])
            writer.writerow(res[1])

In [91]:
DATA_PATH = './test_images/'

images = []

for (dirpath, dirnames, filenames) in walk(DATA_PATH):
    images.extend(filenames)

results = []

for img in images:
    res = model.predict(DATA_PATH + img)
    print('append')
    if res:
        results.append(res)

to_csv(results)

file = pd.read_csv(MODEL_RESULT_PATH)
print(file)


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\28005312.jpg: 480x800 1 number, 85.0ms
Speed: 3.0ms preprocess, 85.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results13[0m


Output()


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\28008332.jpg: 480x800 1 number, 85.0ms
Speed: 3.0ms preprocess, 85.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results14[0m


Output()

append



image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\28025021.jpg: 480x800 1 number, 88.0ms
Speed: 4.0ms preprocess, 88.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results15[0m


Output()

append



image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\29025210.jpg: 480x800 (no detections), 88.0ms
Speed: 6.0ms preprocess, 88.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results16[0m



append
append


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\29029972.jpg: 480x800 1 number, 86.0ms
Speed: 5.0ms preprocess, 86.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results17[0m


Output()


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\29051091.jpg: 480x800 (no detections), 85.0ms
Speed: 4.0ms preprocess, 85.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results18[0m



append
append


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\42026633.jpg: 800x736 1 number, 123.0ms
Speed: 6.0ms preprocess, 123.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 736)
Results saved to [1myolo_detections\results19[0m


Output()


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\42026781.jpg: 480x800 1 number, 87.0ms
Speed: 3.0ms preprocess, 87.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1myolo_detections\results20[0m


Output()

append



image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\test_images\42026872.jpg: 800x736 1 number, 122.0ms
Speed: 5.0ms preprocess, 122.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 736)
Results saved to [1myolo_detections\results21[0m


Output()

append


append
        filename  type     number  is_correct                    model
0   28005312.jpg     1   28005312           1        Recognition_model
1   28005312.jpg     1  280015312           0  Angle_recognition_model
2   28008332.jpg     1   28008332           1        Recognition_model
3   28008332.jpg     1   28006332           0  Angle_recognition_model
4   28025021.jpg     1   28025021           1        Recognition_model
5   28025021.jpg     1   28025021           1  Angle_recognition_model
6   29029972.jpg     1   29029972           1        Recognition_model
7   29029972.jpg     1   29029972           1  Angle_recognition_model
8   42026633.jpg     1   42026633           1        Recognition_model
9   42026633.jpg     1   42026633           1  Angle_recognition_model
10  42026781.jpg     1   42028781           0        Recognition_model
11  42026781.jpg     1   42026701           0  Angle_recognition_model
12  42026872.jpg     1   42026872           1        Recognition_model