In [None]:
"""
OCR -> MASK 생성 -> ClipDrop Cleanup(Inpainting) 호출 파이프라인

필수 라이브러리:
    pip install paddleocr paddlepaddle==2.5.0
    pip install opencv-python pillow requests python-dotenv

환경변수:
    CLIPDROP_API_KEY=your_api_key_here
"""

import os
from typing import List, Dict, Any
import json
import cv2
import numpy as np
from paddleocr import PaddleOCR
from PIL import Image
import requests
from dotenv import load_dotenv


load_dotenv()
CLIPDROP_API_KEY = os.getenv("CLIPDROP_API_KEY")


# ==============================
# 0. OCR 엔진 설정
# ==============================
# 한글+영어 인식
OCR_ENGINE = PaddleOCR(
    lang="korean",  # 영어도 같이 됨
)




In [32]:
def run_ocr_boxes_only(
    image_path: str,
    min_area: int = 100,    # 너무 작은 노이즈 제거용
) -> List[Dict[str, Any]]:
    """
    PaddleOCR 3.x .ocr() 결과에서
    텍스트 영역 박스(폴리곤)만 뽑아서 반환.

    리턴 예시:
    [
        {
            "box": [[x1,y1], [x2,y2], [x3,y3], [x4,y4]],
            "area": 1234.5,
        },
        ...
    ]
    """
    result_iter = OCR_ENGINE.ocr(image_path)

    boxes: List[Dict[str, Any]] = []
    total_raw = 0

    for res in result_iter:
        # 1) 파이프라인 Result → dict 추출
        if hasattr(res, "res"):
            data = res.res
        elif isinstance(res, dict) and "res" in res:
            data = res["res"]
        else:
            data = res

        if not isinstance(data, dict):
            print("[OCR] unexpected result type:", type(data))
            return []

        # 2) 폴리곤 후보: rec_polys > dt_polys > rec_boxes 순으로 사용
        polys = data.get("rec_polys", None)
        if polys is None:
            polys = data.get("dt_polys", None)
        if polys is None:
            polys = data.get("rec_boxes", None)

        if polys is None:
            print("[OCR] no polys found, keys:", list(data.keys()))
            return []

        polys = np.array(polys)
        total_raw = polys.shape[0]

        # rec_boxes가 (N, 8)일 수도 있으니 (N,4,2)로 reshape
        if polys.ndim == 2 and polys.shape[1] == 8:
            polys = polys.reshape(-1, 4, 2)

        for poly in polys:
            pts = np.array(poly, dtype=np.float32)

            # 1D로 나오는 경우: [x1,y1,x2,y2,...] → (-1,2)
            if pts.ndim == 1:
                if pts.size % 2 != 0:
                    continue
                pts = pts.reshape(-1, 2)

            # 2D인데 (N,2)가 아니면 강제로 reshape 시도
            elif pts.ndim == 2 and pts.shape[1] != 2:
                try:
                    pts = pts.reshape(-1, 2)
                except Exception:
                    continue

            if pts.shape[0] < 3:
                continue

            try:
                area = cv2.contourArea(pts)
            except Exception:
                continue

            if area < min_area:
                continue

            boxes.append(
                {
                    "box": pts.tolist(),
                    "area": float(area),
                }
            )

        # 한 이미지 한 번만 처리
        break

    print(f"[OCR] raw polys: {total_raw}, kept after filters: {len(boxes)}")
    return boxes


In [33]:
def debug_ocr_with_text(
    image_path: str,
    min_score: float = 0.0,
    min_area: int = 0,
) -> List[Dict[str, Any]]:
    """
    PaddleOCR 3.x .ocr() 결과에서
    [박스 + 인식 텍스트 + score]를 같이 확인하기 위한 디버그 함수.
    콘솔에 한 줄씩 찍어주고, 리스트로도 리턴.

    리턴 예시:
    [
        {
            "index": 1,
            "text": "2025.09.26~",
            "score": 0.999,
            "bbox": [x,y,w,h],
            "poly": [[x1,y1], ...]
        },
        ...
    ]
    """
    result_iter = OCR_ENGINE.ocr(image_path)

    outputs: List[Dict[str, Any]] = []

    for res in result_iter:
        # 1) res 안에서 실제 데이터 dict 꺼내기
        if hasattr(res, "res"):
            data = res.res
        elif isinstance(res, dict) and "res" in res:
            data = res["res"]
        else:
            data = res

        if not isinstance(data, dict):
            print("[OCR] unexpected result type:", type(data))
            return []

        # 2) 폴리곤(박스) 가져오기
        polys = data.get("rec_polys", None)
        if polys is None:
            polys = data.get("dt_polys", None)
        if polys is None:
            polys = data.get("rec_boxes", None)
        if polys is None:
            print("[OCR] no polys found, keys:", list(data.keys()))
            return []

        polys = np.array(polys)

        # (N, 8) → (N, 4, 2) 로 reshape 되는 케이스 처리
        if polys.ndim == 2 and polys.shape[1] == 8:
            polys = polys.reshape(-1, 4, 2)

        # 3) 텍스트 / 스코어 가져오기
        texts = data.get("rec_texts", None)
        if texts is None:
            texts = data.get("rec_text", [])

        scores = data.get("rec_scores", None)
        if scores is None:
            scores = data.get("rec_score", [])

        # numpy → list 변환
        if isinstance(texts, np.ndarray):
            texts = texts.tolist()
        if isinstance(scores, np.ndarray):
            scores = scores.tolist()

        # 스칼라 → 리스트 래핑
        if not isinstance(texts, (list, tuple)):
            texts = [texts]
        if not isinstance(scores, (list, tuple)):
            scores = [scores] * len(texts)

        n = min(polys.shape[0], len(texts), len(scores))

        for i in range(n):
            poly = polys[i]
            txt = str(texts[i])
            try:
                sc = float(scores[i])
            except Exception:
                sc = 1.0

            if sc < min_score:
                continue

            pts = np.array(poly, dtype=np.float32)
            if pts.ndim == 1 and pts.size % 2 == 0:
                pts = pts.reshape(-1, 2)
            elif pts.ndim == 2 and pts.shape[1] != 2:
                try:
                    pts = pts.reshape(-1, 2)
                except Exception:
                    continue

            if pts.shape[0] < 3:
                continue

            area = cv2.contourArea(pts)
            if area < min_area:
                continue

            x, y, w, h = cv2.boundingRect(pts.astype(np.int32))

            info = {
                "index": len(outputs) + 1,
                "text": txt,
                "score": sc,
                "bbox": [x, y, w, h],
                "poly": pts.tolist(),
            }
            outputs.append(info)

            print(
                f"[{info['index']}] text='{txt}'  "
                f"score={sc:.3f}  bbox(x,y,w,h)={x},{y},{w},{h}"
            )

        # 한 이미지 한 번만
        break

    print(f"[DEBUG] total detections: {len(outputs)}")
    return outputs


In [34]:
def save_debug_ocr_image(
    image_path: str,
    ocr_boxes: List[Dict[str, Any]],
    output_path: str,
) -> None:
    """
    OCR 박스들을 원본 이미지 위에 폴리곤(빨간 선)으로 그려서 저장.
    """
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Cannot read image: {image_path}")

    for idx, b in enumerate(ocr_boxes):
        pts = np.array(b["box"], dtype=np.int32)

        # 폴리곤 라인 그대로 그리기
        cv2.polylines(img, [pts], isClosed=True, color=(0, 0, 255), thickness=3)

        # 번호 라벨
        x, y, w, h = cv2.boundingRect(pts)
        cv2.putText(
            img,
            str(idx + 1),
            (x, max(0, y - 10)),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.0,
            (0, 0, 255),
            2,
        )

    cv2.imwrite(output_path, img)
    print(f"[DEBUG] OCR box overlay saved to: {output_path}")


In [35]:
def export_ocr_for_gpt(
    image_path: str,
    out_json_path: str,
    min_score: float = 0.2,
    min_area: int = 100,
) -> Dict[str, Any]:
    """
    GPT에 넘기기 좋은 형식으로 OCR 결과를 JSON으로 저장.

    구조 예시:
    {
      "image_size": {"width": 4096, "height": 1024},
      "ocr_results": [
        {"id": 1, "text": "2025.09.26~", "score": 0.999, "bbox": [x,y,w,h]},
        ...
      ]
    }
    """
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Cannot read image: {image_path}")
    h, w = img.shape[:2]

    debug_items = debug_ocr_with_text(
        image_path,
        min_score=min_score,
        min_area=min_area,
    )

    data = {
        "image_size": {"width": w, "height": h},
        "ocr_results": [
            {
                "id": item["index"],
                "text": item["text"],
                "score": float(item["score"]),
                "bbox": item["bbox"],
            }
            for item in debug_items
        ],
    }

    with open(out_json_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"[EXPORT] OCR for GPT saved to: {out_json_path}")
    return data


In [36]:
def call_clipdrop_remove_text(image_path: str, output_image_path: str) -> None:
    if not CLIPDROP_API_KEY:
        raise RuntimeError("CLIPDROP_API_KEY 비어있음")

    url = "https://clipdrop-api.co/remove-text/v1"
    headers = {"x-api-key": CLIPDROP_API_KEY}

    with open(image_path, "rb") as image_file_object:
        files = {
            "image_file": (
                os.path.basename(image_path),  # 'image.jpg' 자리
                image_file_object,
                "image/png",                   # test1.png니까 png로
            )
        }

        r = requests.post(url, files=files, headers=headers)

    if r.ok:
        with open(output_image_path, "wb") as out:
            out.write(r.content)
        print(f"[CLIPDROP] remove-text saved to: {output_image_path}")
    else:
        print("[CLIPDROP ERROR]", r.status_code, r.text)
        r.raise_for_status()

In [37]:
def process_poster(
    image_path: str,
    out_dir: str = "./output",
    prefix: str = "poster",
) -> Dict[str, str]:
    """
    - OCR 폴리곤 검출
    - 디버그 오버레이 이미지 생성
    - GPT용 OCR JSON 생성
    """
    os.makedirs(out_dir, exist_ok=True)
    
    cleaned_path = os.path.join(out_dir, f"{prefix}_cleaned.png")
    debug_overlay_path = os.path.join(out_dir, f"{prefix}_ocr_debug.png")
    ocr_json_path = os.path.join(out_dir, f"{prefix}_ocr_results.json")

    # 1) 폴리곤만 (레이아웃용)
    print("[STEP 1] Running OCR (boxes only)...")
    ocr_boxes = run_ocr_boxes_only(image_path)
    print(f"[STEP 1] detected text boxes: {len(ocr_boxes)}")

    # 2) 폴리곤 디버그 이미지
    save_debug_ocr_image(image_path, ocr_boxes, debug_overlay_path)

    # 3) GPT용 JSON (텍스트+bbox)
    export_ocr_for_gpt(image_path, ocr_json_path)

     # 3) text 제거 이미지 
    call_clipdrop_remove_text(image_path, cleaned_path)

    return {
        "original": image_path,
        "debug_overlay": debug_overlay_path,
        "ocr_json": ocr_json_path,
        "cleaned": cleaned_path,
    }


In [38]:
if __name__ == "__main__":
    # 실제 배너 이미지 경로
    INPUT_IMAGE = r"C:\final_project\ACC\acc-ai\app\data\editor\test6.png"
    

    result_paths = process_poster(INPUT_IMAGE, out_dir="./output", prefix="sample")
    print(result_paths)


[STEP 1] Running OCR (boxes only)...


  result_iter = OCR_ENGINE.ocr(image_path)
[33mResized image size (1024x4096) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[OCR] raw polys: 6, kept after filters: 6
[STEP 1] detected text boxes: 6
[DEBUG] OCR box overlay saved to: ./output\sample_ocr_debug.png


  result_iter = OCR_ENGINE.ocr(image_path)
[33mResized image size (1024x4096) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


[1] text='2025.05.03N'  score=0.980  bbox(x,y,w,h)=1586,362,613,148
[2] text='2025.05.06'  score=1.000  bbox(x,y,w,h)=2198,392,475,81
[3] text='A15B'  score=0.998  bbox(x,y,w,h)=803,455,940,302
[4] text='CD'  score=0.988  bbox(x,y,w,h)=1618,470,591,261
[5] text='EFGH IJ'  score=0.946  bbox(x,y,w,h)=2200,467,1114,272
[6] text='BCD EFG HIJKLM NO'  score=0.955  bbox(x,y,w,h)=753,728,2603,282
[DEBUG] total detections: 6
[EXPORT] OCR for GPT saved to: ./output\sample_ocr_results.json
[CLIPDROP] remove-text saved to: ./output\sample_cleaned.png
{'original': 'C:\\final_project\\ACC\\acc-ai\\app\\data\\editor\\test6.png', 'debug_overlay': './output\\sample_ocr_debug.png', 'ocr_json': './output\\sample_ocr_results.json', 'cleaned': './output\\sample_cleaned.png'}
