In [1]:
import json
import numpy as np
from PIL import Image, ImageDraw

REMOVE_LABEL = ["(한자)", "(일본아)"]


def crop_by_polygon(image, polygon):
    polygon = sum(polygon, [])
    """_summary_
    iamge를 polygon 영역에 대해 crop 해 준다.

    Args:
        image (PIL.Image): _description_
        polygon (list): [(x1, y1), (x2, y2), ... , (xn, yn)]

    Returns:
        _type_: polygon_points 영역에 대해 cropped된 image 객체(PIL.Image)
    """

    # Create a mask image
    mask = Image.new('L', image.size, 0)
    ImageDraw.Draw(mask).polygon(polygon, outline=1, fill=1)

    # Convert the mask to a NumPy array
    mask_np = np.array(mask)

    # Apply the mask to the image
    image_np = np.array(image)
    result = np.zeros_like(image_np)
    result[mask_np == 1] = image_np[mask_np == 1]

    # Create a new image from the result array
    result_image = Image.fromarray(result)

    # Find bounding box of the polygon
    bbox = mask.getbbox()

    # Crop the image to the bounding box
    cropped_image = result_image.crop(bbox)

    return cropped_image

def is_point_inside_polygon(point, polygon):
    """_summary_

    Args:
        point (list): 내부에 있는지 확인할 point (x, y)
        polygon (list): 영역을 나타내는 polygon 영역 [(x1, y1), (x2, y2), ... , (xn, yn)]

    Returns:
        bool: point가 polygon 내부에 위치하는지 여부
    """
    x, y = point
    n = len(polygon)
    inside = False

    p1x, p1y = polygon[0]
    for i in range(1, n + 1):
        p2x, p2y = polygon[i % n]
        if y > min(p1y, p2y) and y <= max(p1y, p2y) and x <= max(p1x, p2x):
            if p1y != p2y:
                xinters = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x
                if p1x == p2x or x <= xinters:
                    inside = not inside
        p1x, p1y = p2x, p2y

    return inside

def is_polygon_inside_polygon(polygon1, polygon2):    # polygon1이 polygon2에 완전히 포함되는지
    """_summary_

    Args:
        polygon1 (list): 상대 polygon에 포함되는지 확인할 polygon [(x1, y1), (x2, y2), ... , (xn, yn)]
        polygon2 (list): 상대 polygon을 감싸는지 체크할 polygon [(x1, y1), (x2, y2), ... , (xn, yn)]

    Returns:
        bool: polygon1이 polygon2에 포함되는지 여부
    """
    return all([is_point_inside_polygon(point, polygon2)  for point in polygon1])

def load_ppocrlabel(path, check_text_is_in_sign=True):
    
    invalid_label_num = 0
    invalid_image_num = 0
    
    """_summary_

    Args:
        path (str): ppocrlabel 프로그램을 사용해 만든 레이블 파일의 경로
        check_text_is_in_sign (bool): 모든 텍스트가 간판 내에만 있는지 확인하고 그렇지 않은 이미지는 제외
    Returns:
        list: 레이블 정보를 로드하여 리스트 형태로 반환
        
        result = [image_label, ...]
        image_label = {"sign":label, "text":[label, ...]}
        label = {"transcription":str , "points": polygon}
        polygon: [(x1, y1), (x2, y2), ... , (xn, yn)]
        
    """

    with open(path) as f:
        lines = [line.rstrip("\n") for line in f.readlines()]

    data = {}
    for line in lines:
        image_path, labels = line.split("\t")
        sign_labels, text_labels = [], []
        
        for label in json.loads(labels):
            if label["transcription"] in REMOVE_LABEL:
                invalid_label_num += 1
                print(f"제거됨 {label}")
            else:
                text = label["transcription"]
                if text == "@@@":
                    sign_labels.append(label)        
                else:
                    text_labels.append(label)
        
        
        # 모든 텍스트가 간판 안에 들어있는가?
        if check_text_is_in_sign:
            if not all([any([is_polygon_inside_polygon(text_label["points"], sign_label["points"]) for sign_label in sign_labels]) for text_label in text_labels]):
                invalid_image_num += 1
                print(f"유효하지 않아 제거됨: {image_path}")
                continue
        
        final_label = []        
        for sign_label in sign_labels:
            
            sign_text_labels = []
            for text_label in text_labels:
                if is_polygon_inside_polygon(text_label["points"], sign_label["points"]):
                    sign_text_labels.append(text_label)
            
            final_label.append({"sign":sign_label, "text":sign_text_labels})                
        data[image_path] = final_label
        
    print(f"invalid_label_num: {invalid_label_num}")
    print(f"invalid_image_num: {invalid_image_num}")    
    return data

In [2]:

# image_path = './00fcf6af-91fb-4594-b245-481bd0483162-aligned.jpg'
# image = Image.open(image_path)
# polygon = [[1422, 1145], [1418, 1285], [1588, 1278], [1595, 1232], [2123, 1218], [2123, 1122]]

# cropped_image = crop_by_polygon(image, polygon)
# cropped_image.show()

In [3]:
from pathlib import Path

def make_and_save_detection_dataset(data, save_dir, dir_size=1000):
    save_dir = Path(save_dir)
    if save_dir.exists():
        save_dir.rmdir()
    label_path = save_dir/"label.txt"
    image_idx = 0

    for image_path, labels in data.items():    
        image = Image.open(image_path)
        for label in labels:
            sign_label = label["sign"]
            text_labels = label["text"]

            cropped_image = crop_by_polygon(image, sign_label["points"])
            image_path = save_dir/f"{(image_idx//dir_size + 1)}"/f"{image_idx+1}.png"
            image_path.parent.mkdir(parents=True, exist_ok=True)
            cropped_image.save(image_path)
            with open(label_path, "a") as f:
                f.write(f"{str(image_path)}\t{json.dumps(text_labels)}\n")

            image_idx += 1

def make_and_save_recognition_dataset(data, save_dir, dir_size=1000):
    save_dir = Path(save_dir)
    if save_dir.exists():
        save_dir.rmdir()
    label_path = save_dir/"label.txt"
    image_idx = 0

    for image_path, labels in data.items():    
        image = Image.open(image_path)
        for label in labels:
            sign_label = label["sign"]
            text_labels = label["text"]
            
            for text_label in text_labels:
                cropped_image = crop_by_polygon(image, text_label["points"])
                image_path = save_dir/f"{(image_idx//dir_size + 1)}"/f"{image_idx+1}.png"
                image_path.parent.mkdir(parents=True, exist_ok=True)
                cropped_image.save(image_path)
                with open(label_path, "a") as f:
                    f.write(f"{str(image_path)}\t{text_label['transcription']}\n")

            image_idx += 1
 

label_file_path = "./sample/Label.txt"
data = load_ppocrlabel(label_file_path)

make_and_save_detection_dataset(data, "./result/det")
make_and_save_recognition_dataset(data, "./result/rec")

제거됨 {'transcription': '(한자)', 'points': [[1365, 868], [1370, 1103], [2420, 1130], [2395, 900]], 'difficult': False}
유효하지 않아 제거됨: sample/0aafd4c1-8538-4555-a8be-0583dfb82718-aligned.jpg
유효하지 않아 제거됨: sample/0b16eb5c-154b-462b-bad9-8c7ee9ed8d5b-aligned.jpg
제거됨 {'transcription': '(한자)', 'points': [[514, 2360], [505, 2388], [512, 2403], [581, 2408], [585, 2366]], 'difficult': False}
제거됨 {'transcription': '(한자)', 'points': [[470, 1883], [467, 1901], [481, 1914], [498, 1904], [492, 1884]], 'difficult': False}
제거됨 {'transcription': '(한자)', 'points': [[455, 1974], [483, 1973], [485, 1962], [477, 1949], [459, 1955]], 'difficult': False}
유효하지 않아 제거됨: sample/0bdf2fcd-557d-4e74-ae65-54d9455ada6c-aligned.jpg
invalid_label_num: 4
invalid_image_num: 3


[True, True, True, True]


True