In [2]:
import json
from pathlib import Path
import glob
import cv2
import numpy as np
import shutil

LANGUAGE_LIST = ['japanese', 'chinese', 'thai', 'vietnamese']

def read_json(filename):
    with Path(filename).open(encoding='utf8') as handle:
        return json.load(handle)

def convert_and_save_prepros(image_path, save_path):
    img = cv2.imread(str(image_path))
    if img is None:
        print(f"이미지를 읽을 수 없습니다: {image_path}")
        return False
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #blurred = cv2.medianBlur(gray, 5)#cv2.GaussianBlur(gray, (3, 3), 0)

    background = cv2.dilate(gray, np.ones((3,3), np.uint8), iterations=10)
    diff = cv2.absdiff(gray, background)
    diff = cv2.normalize(diff, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    inverted_diff = cv2.bitwise_not(diff) 
    # binary = cv2.adaptiveThreshold(diff, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 21, 3)
    cv2.imwrite(str(save_path), inverted_diff)
    return True

base_dir = Path("../data")
gray_base_dir = base_dir / "prepros2_data"

for language in LANGUAGE_LIST:
    receipt_dir = base_dir / f"{language}_receipt"
    gray_receipt_dir = gray_base_dir / f"{language}_receipt"  # _gray 추가
    
    # 그전퍼리 이미지 저장 디렉토리 생성
    for subset in ['train', 'test']:
        (gray_receipt_dir / 'img' / subset).mkdir(parents=True, exist_ok=True)
    
    # UFO 디렉토리 복사
    shutil.copytree(receipt_dir / 'ufo', gray_receipt_dir / 'ufo', dirs_exist_ok=True)
    
    for subset in ['train', 'test']:
        # 이미지 처리
        img_dir = receipt_dir / 'img' / subset
        gray_img_dir = gray_receipt_dir / 'img' / subset
        
        for img_path in img_dir.glob('*'):
            gray_path = gray_img_dir / f"{img_path.name}"
            if convert_and_save_prepros(img_path, gray_path):
                print(f"전처리 변환 및 저장 완료: {gray_path}")

print("모든 처리가 완료되었습니다.")

전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000923_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000677_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000029_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000242_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000710_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000174_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000498_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000783_page0001.jpg
전처리 변환 및 저장 완료: ../data/prepros2_data/japanese_receipt/img/train/extractor.ja.in_house.appen_000