In [57]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [58]:
import easyocr
import cv2
import numpy as np
from PIL import Image
from ultralytics import YOLO
import pytesseract
from pytesseract import Output

In [3]:
reader = easyocr.Reader(['en'])

In [53]:
grey_im_path = 'data/grey_numer.png'
color_im_path = 'data/color_number.jpg'
bin_grey_im_path = 'data/bin_grey_number.png'
bin_color_im_path = 'data/bin_color_number.png'

grey_im = cv2.imread(grey_im_path)
color_im = cv2.imread(color_im_path)

In [54]:
th_1, g_im_th = cv2.threshold(grey_im, 128, 255, cv2.THRESH_BINARY)
th_2, c_im_th = cv2.threshold(color_im, 128, 255, cv2.THRESH_BINARY)

cv2.imwrite(bin_grey_im_path, g_im_th)
cv2.imwrite(bin_color_im_path, c_im_th)

True

In [55]:
grey_im_result = reader.readtext(grey_im_path, allowlist ='0123456789')
color_im_result = reader.readtext(color_im_path, allowlist ='0123456789')

bin_grey_im_result = reader.readtext(bin_grey_im_path, allowlist ='0123456789')
bin_color_im_result = reader.readtext(bin_color_im_path, allowlist ='0123456789')

In [10]:
def print_boxes(ocr_data, frame):
    for box in ocr_data:
        x1 = box[0][0][0]
        y1 = box[0][0][1]
        x2 = box[0][2][0]
        y2 = box[0][2][1]
        label = box[1]
        frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (0,0,255))
        frame = cv2.putText(frame, label, (x1, y1+15), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)

    return frame

In [59]:
detection_model = YOLO('./models/custom_yolov8pt_25_orig.pt')
detection_model.to('cuda')

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 80, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(80, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(400, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_r

In [75]:
# Yolo detection for images
result = detection_model.predict('./data/63139984.jpg', save = True, save_crop=True, project='custom_runs')


image 1/1 C:\Users\\PycharmProjects\Wagon_number_OCR\data\63139984.jpg: 480x800 1 number, 288.0ms
Speed: 6.0ms preprocess, 288.0ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 800)
Results saved to [1mcustom_runs\custom_runs[0m


In [59]:
grey_im.shape

(240, 320, 3)

In [60]:
color_im.shape

(720, 1280, 3)

In [140]:
#  Image preprocessing: cv2.THRESH_BINARY, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.ADAPTIVE_THRESH_MEAN_C
crop_test = 'runs/detect/predict8/crops/number/42030106.jpg'
bin_save_path = './temp/bin_img.jpg'
gauss_save_path = './temp/adapt_gauss_bin_img.jpg'
mean_save_path = './temp/adapt_mean_bin_img.jpg'
otsu_save_path = './temp/otsu_img.jpg'

img = cv2.imread(crop_test, cv2.IMREAD_GRAYSCALE)
ret1, bin_img = cv2.threshold(img, 127,255, cv2.THRESH_BINARY)

gauss_blur = cv2.GaussianBlur(img,(1,1),0)
ret3, otsu_img = cv2.threshold(gauss_blur,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)

blur_img = cv2.medianBlur(img, 5)

# 49, 2 / 69, -5
gauss_bin_img = cv2.adaptiveThreshold(blur_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 29, -4)
mean_bin_img = cv2.adaptiveThreshold(blur_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 29, -4)

cv2.imwrite(bin_save_path, bin_img)
cv2.imwrite(gauss_save_path, gauss_bin_img)
cv2.imwrite(mean_save_path, mean_bin_img)
cv2.imwrite(otsu_save_path, otsu_img)

True

In [6]:
# Testing easyocr for img with different preprocessings

origin_img_res = reader.readtext(crop_test, allowlist ='0123456789')
bin_img_res = reader.readtext(bin_save_path, allowlist ='0123456789')  # cv2.THRESH_BINARY
gauss_img_res = reader.readtext(gauss_save_path, allowlist ='0123456789')  # cv2.ADAPTIVE_THRESH_GAUSSIAN_C + cv2.THRESH_BINARY_INV
mean_img_res = reader.readtext(mean_save_path, allowlist ='0123456789')  # cv2.ADAPTIVE_THRESH_MEAN_C + cv2.THRESH_BINARY_INV

frame = cv2.imread(crop_test)
annotated_frame = print_boxes(origin_img_res, frame)
cv2.imwrite('./results/origin_color.jpg', annotated_frame)

frame = cv2.imread(bin_save_path)
annotated_frame = print_boxes(bin_img_res, frame)
cv2.imwrite('./results/bin_color_res.jpg', annotated_frame)

frame = cv2.imread(gauss_save_path)
annotated_frame = print_boxes(gauss_img_res, frame)
cv2.imwrite('./results/gauss_color_res.jpg', annotated_frame)

frame = cv2.imread(mean_save_path)
annotated_frame = print_boxes(mean_img_res, frame)
cv2.imwrite('./results/mean_color_res.jpg', annotated_frame)

print(f'Origin color: {origin_img_res[0][1], origin_img_res[0][2]}')
if bin_img_res:
    print(f'Bin color: {bin_img_res[0][1], bin_img_res[0][2]}')
if gauss_img_res:
    print(f'Gauss color: {gauss_img_res[0][1], gauss_img_res[0][2]}')
if mean_img_res:
    print(f'Mean color: {mean_img_res[0][1], mean_img_res[0][2]}')

NameError: name 'reader' is not defined

In [64]:
bin_color_res

[]

In [None]:
origin_grey_res = reader.readtext(grey_crop_test, allowlist ='0123456789')

frame = cv2.imread(grey_crop_test)
annotated_frame = print_boxes(origin_grey_res, frame)
cv2.imwrite('./results/origin_grey.jpg', annotated_frame)

In [None]:
img = cv2.imread('./.jpg')
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
d = pytesseract.image_to_string(img)
print(d)

In [1]:
from mmocr.apis import MMOCRInferencer

In [47]:
ocr = MMOCRInferencer(det=None, rec='Aster')  # 'DBNet'

KeyboardInterrupt: 

In [67]:
ocr

NameError: name 'get_atter' is not defined

In [71]:
res = ocr('runs/detect/predict8/crops/number/_2338558.jpg')

Output()

In [72]:
res

{'predictions': [{'rec_texts': ['23385582'],
   'rec_scores': [0.8660839647054672]}],
 'visualization': []}

In [41]:
f = open('train_data/full/labels')


KeyboardInterrupt



In [4]:
import os
os.listdir('train_data/full/labels')

['24252710.json',
 '24295479.json',
 '24353013.json',
 '24424020.json',
 '24432064.json',
 '24529281.json',
 '24530545.json',
 '26237099.json',
 '28005312.json',
 '28008332.json',
 '28025021.json',
 '28027647.json',
 '28047389.json',
 '28079838.json',
 '28822880.json',
 '28865640.json',
 '29025210.json',
 '29029972.json',
 '29051091.json',
 '30603179.json',
 '30639017.json',
 '30639124.json',
 '42012526.json',
 '42026278.json',
 '42026633.json',
 '42026781.json',
 '42026872.json',
 '42030098.json',
 '42030106.json',
 '42030189.json',
 '42030239.json',
 '42030312.json',
 '42030361.json',
 '42030510.json',
 '42030973.json',
 '42031922.json',
 '42031971.json',
 '42032045.json',
 '42032110.json',
 '42032250.json',
 '42038323.json',
 '42038448.json',
 '42038455.json',
 '42038497.json',
 '42039958.json',
 '42040188.json',
 '42040550.json',
 '42040634.json',
 '42040741.json',
 '42070607.json',
 '42071019.json',
 '42071043.json',
 '42071373.json',
 '42091595.json',
 '42099309.json',
 '42099358

In [6]:
files = [f for f in os.listdir('train_data/full/labels') if os.path.isfile(f)]

In [7]:
files

[]

In [15]:
from os import walk

f = []
layer = 1
w = walk('train_data/full/labels')

for (dirpath, dirnames, filenames) in w:


['24252710.json', '24295479.json', '24353013.json', '24424020.json', '24432064.json', '24529281.json', '24530545.json', '26237099.json', '28005312.json', '28008332.json', '28025021.json', '28027647.json', '28047389.json', '28079838.json', '28822880.json', '28865640.json', '29025210.json', '29029972.json', '29051091.json', '30603179.json', '30639017.json', '30639124.json', '42012526.json', '42026278.json', '42026633.json', '42026781.json', '42026872.json', '42030098.json', '42030106.json', '42030189.json', '42030239.json', '42030312.json', '42030361.json', '42030510.json', '42030973.json', '42031922.json', '42031971.json', '42032045.json', '42032110.json', '42032250.json', '42038323.json', '42038448.json', '42038455.json', '42038497.json', '42039958.json', '42040188.json', '42040550.json', '42040634.json', '42040741.json', '42070607.json', '42071019.json', '42071043.json', '42071373.json', '42091595.json', '42099309.json', '42099358.json', '42110247.json', '42115840.json', '42148205.jso

In [16]:
files = [f for f in os.listdir('train_data/full/labels') if os.path.isfile(f)]

In [17]:
files

[]

In [78]:
import cv2
img = cv2.imread('./train_data/full/images/24252710.jpg')

In [110]:
dir_path = 'train_data/full/labels/'
dir_path_2 = 'train_data/part/labels/'

# list to store files
res = []
res_2 = []

# Iterate directory
for file_path in os.listdir(dir_path):
    # check if current file_path is a file
    if os.path.isfile(os.path.join(dir_path, file_path)):
        # add filename to list
        res.append(file_path)

for file_path in os.listdir(dir_path_2):
    # check if current file_path is a file
    if os.path.isfile(os.path.join(dir_path_2, file_path)):
        # add filename to list
        res_2.append(file_path)

In [111]:
import json

for file in res:
    with open(dir_path + file) as f:
        a = json.loads(f.read())

        img = cv2.imread('train_data/full/images/' + file[:-5]+'.jpg')

        x = a[0]['annotations'][0]['coordinates']['x'] / img.shape[1]
        y = a[0]['annotations'][0]['coordinates']['y'] / img.shape[0]
        width = a[0]['annotations'][0]['coordinates']['width'] / img.shape[1]
        height = a[0]['annotations'][0]['coordinates']['height'] / img.shape[0]

        data = f'number {x} {y} {width} {height}'

        with open(f'new_labels/{file[:-4]}txt', 'w') as m:
            m.write(data)


for file in res_2:
    with open(dir_path_2 + file) as d:
        a = json.loads(d.read())

        img = cv2.imread('train_data/part/images/' + file[:-5]+'.jpg')

        x = a[0]['annotations'][0]['coordinates']['x'] / img.shape[1]
        y = a[0]['annotations'][0]['coordinates']['y'] / img.shape[0]
        width = a[0]['annotations'][0]['coordinates']['width'] / img.shape[1]
        height = a[0]['annotations'][0]['coordinates']['height'] / img.shape[0]

        data = f'number {x} {y} {width} {height}'

        with open(f'new_labels/{file[:-4]}txt', 'a') as k:
            k.write(data)

In [73]:
import json
import csv

labels = []

for file in res:
    labels.append([file[:-4] + 'jpg', file[:-5]])

for file in res_2:
    labels.append([file[:-4] + 'jpg', file[:-5]])

with open('easy_labels.csv', 'w', encoding='UTF8') as f:
    fields = ('filename', 'words')
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator = '\n')
    for row in labels:
        writer.writerow({'filename': row[0], 'words': row[1].replace("_","")})


In [77]:
import cv2
img = cv2.imread('./train_data/full/images/24252710.jpg')

In [84]:
a.shape[0]

AttributeError: 'list' object has no attribute 'shape'