In [1]:
import os 
import json
import cv2
import numpy as np
import math
from PIL import Image, ImageDraw

In [9]:
class Rotate(object):

    def __init__(self, image: Image.Image, coordinate):
        self.image = image.convert('RGB')
        self.coordinate = coordinate
        self.xy = [tuple(self.coordinate[k]) for k in ['left_top', 'right_top', 'right_bottom', 'left_bottom']]
        self._mask = None
        self.image.putalpha(self.mask)

    @property
    def mask(self):
        if not self._mask:
            mask = Image.new('L', self.image.size, 0)
            draw = ImageDraw.Draw(mask, 'L')
            draw.polygon(self.xy, fill=255)
            self._mask = mask
        return self._mask

    def run(self):
        image = self.rotation_angle()
        box = image.getbbox()
        return image.crop(box)

    def rotation_angle(self):
        x1, y1 = self.xy[0]
        x2, y2 = self.xy[1]
        angle = self.angle([x1, y1, x2, y2], [0, 0, 10, 0]) * -1
        return self.image.rotate(angle, expand=True)

    def angle(self, v1, v2):
        dx1 = v1[2] - v1[0]
        dy1 = v1[3] - v1[1]
        dx2 = v2[2] - v2[0]
        dy2 = v2[3] - v2[1]
        angle1 = math.atan2(dy1, dx1)
        angle1 = int(angle1 * 180 / math.pi)
        angle2 = math.atan2(dy2, dx2)
        angle2 = int(angle2 * 180 / math.pi)
        if angle1 * angle2 >= 0:
            included_angle = abs(angle1 - angle2)
        else:
            included_angle = abs(angle1) + abs(angle2)
            if included_angle > 180:
                included_angle = 360 - included_angle
        return included_angle

In [4]:
with open('./M2026/Label.txt','r',encoding='utf8')as fp:
    s = [i[:-1].split('\t') for i in fp.readlines()]
    f1 = open('M2026_crop/rec_gt_train.txt', 'w', encoding='utf-8')
    f2 = open('M2026_crop/rec_gt_eval.txt', 'w', encoding='utf-8')
    for i in enumerate(s):
        path = i[1][0]
        anno = json.loads(i[1][1])
        filename = i[1][0][6:-4]
        image = Image.open(path)
        for j in range(len(anno)): 
            label = anno[j]['transcription']
            roi = anno[j]['points']
            coordinate = {'left_top': anno[j]['points'][0], 'right_top': anno[j]['points'][1], 'right_bottom': anno[j]['points'][2], 'left_bottom': anno[j]['points'][3]}
            print(roi, label)
            rotate = Rotate(image, coordinate)
            # 把图片放到目录下
            crop_path = 'M2026_crop' + path[5:-4:] + '_' + str(j) + '.jpg'
            rotate.run().convert('RGB').save(crop_path)
            # label文件不写入图片目录
            crop_path = path[6:-4:] + '_' + str(j) + '.jpg'
            if i[0] % 5 != 0:
                f1.writelines(crop_path + '\t' + label + '\n')
            else:
                f2.writelines(crop_path + '\t' + label + '\n')
    f1.close()
    f2.close()

[[24, 51], [55, 51], [55, 103], [24, 103]] A
[[88, 50], [116, 50], [116, 101], [88, 101]] B
[[147, 51], [178, 51], [178, 102], [147, 102]] C
[[214, 52], [241, 52], [241, 101], [214, 101]] D
[[272, 52], [306, 52], [306, 102], [272, 102]] E
[[339, 51], [366, 51], [366, 100], [339, 100]] F
[[405, 52], [430, 52], [430, 102], [405, 102]] G
[[468, 53], [495, 53], [495, 103], [468, 103]] H
[[532, 53], [559, 53], [559, 102], [532, 102]] I
[[595, 53], [624, 53], [624, 104], [595, 104]] J
[[26, 158], [51, 158], [51, 209], [26, 209]] K
[[88, 159], [111, 159], [111, 211], [88, 211]] L
[[149, 158], [178, 158], [178, 209], [149, 209]] M
[[215, 158], [242, 158], [242, 208], [215, 208]] N
[[280, 158], [305, 158], [305, 211], [280, 211]] O
[[341, 162], [368, 162], [368, 202], [341, 202]] P
[[406, 158], [431, 158], [431, 207], [406, 207]] Q
[[470, 160], [494, 160], [494, 208], [470, 208]] R
[[536, 160], [557, 160], [557, 208], [536, 208]] S
[[596, 158], [623, 158], [623, 207], [596, 207]] T
[[24, 266], 

In [7]:
!python tools/train.py -c en_PP-OCRv3_rec_train/config.yml

[2023/06/21 17:29:15] ppocr INFO: Architecture : 
[2023/06/21 17:29:15] ppocr INFO:     Backbone : 
[2023/06/21 17:29:15] ppocr INFO:         last_conv_stride : [1, 2]
[2023/06/21 17:29:15] ppocr INFO:         last_pool_type : avg
[2023/06/21 17:29:15] ppocr INFO:         name : MobileNetV1Enhance
[2023/06/21 17:29:15] ppocr INFO:         scale : 0.5
[2023/06/21 17:29:15] ppocr INFO:     Head : 
[2023/06/21 17:29:15] ppocr INFO:         head_list : 
[2023/06/21 17:29:15] ppocr INFO:             CTCHead : 
[2023/06/21 17:29:15] ppocr INFO:                 Head : 
[2023/06/21 17:29:15] ppocr INFO:                     fc_decay : 1e-05
[2023/06/21 17:29:15] ppocr INFO:                 Neck : 
[2023/06/21 17:29:15] ppocr INFO:                     depth : 2
[2023/06/21 17:29:15] ppocr INFO:                     dims : 64
[2023/06/21 17:29:15] ppocr INFO:                     hidden_dims : 120
[2023/06/21 17:29:15] ppocr INFO:                     name : svtr
[2023/06/21 17:29:15] ppocr INFO:   

In [9]:
!python tools/export_model.py -c en_PP-OCRv3_rec_train/config.yml -o Global.pretrained_model=output/v3_en_mobile2/best_accuracy Global.save_inference_dir=./transrec0620/

W0620 10:48:39.911726 79796 gpu_context.cc:278] Please NOTE: device: 0, GPU Compute Capability: 8.6, Driver API Version: 12.1, Runtime API Version: 11.2
W0620 10:48:39.914041 79796 gpu_context.cc:306] device: 0, cuDNN Version: 8.1.
[2023/06/20 10:48:41] ppocr INFO: load pretrain successful from output/v3_en_mobile2/best_accuracy
[2023/06/20 10:48:42] ppocr INFO: inference model is saved to ./transrec0620/inference
