# OCR 서비스 활용

## 1. Google Cloud Vision API 준비하기

1. [Google Cloud Console](https://console.cloud.google.com/) 에서 프로젝트를 생성/선택합니다.
2. `Cloud Vision API` 를 활성화합니다.
3. `Service Account`를 만들고, `JSON` 형식의 키를 다운로드합니다.
4. 환경 변수 `GOOGLE_APPLICATION_CREDENTIALS` 로 해당 키 파일의 경로를 등록합니다.

In [None]:
%pip install --upgrade google-cloud-vision

In [None]:
from dotenv import load_dotenv
from google.cloud import vision

load_dotenv()

# Vision Client 생성
client = vision.ImageAnnotatorClient()

IMAGE_PATH = '실습용 이미지 경로 설정'  # TODO: 실습용 이미지 경로 수정

with open(IMAGE_PATH, 'rb') as img_file:
    content = img_file.read()

image = vision.Image(content=content)
response = client.text_detection(image=image)
texts = response.text_annotations

print('--- 인식된 텍스트 ---')
if texts:
    print(texts[0].description)
else:
    print('텍스트가 인식되지 않았습니다.')

# 오류 처리
if response.error.message:
    raise Exception(f'API Error: {response.error.message}')

## 2. Naver CLOVA OCR API 준비하기

1. [Naver Cloud Console](https://www.ncloud.com/) 에서 프로젝트를 생성/선택합니다.
2. `AI·ML > CLOVA OCR` 서비스를 활성화합니다.
3. `CLOVA OCR` 서비스 계정을 생성하고, `Secret Key` 와 `Invoke URL` 을 확인합니다.

In [None]:
# 이미지 결과 ROI 도식화를 위한 라이브러리 설치
%pip install opencv-python
# %pip install -U matplotlib

In [None]:
from dotenv import load_dotenv
import os
import base64
import json
import time
import uuid
import requests
import cv2

load_dotenv()

API_URL = os.getenv('CLOVA_OCR_INVOKE URL')
SECRET_KEY =  os.getenv('CLOVA_OCR_API_KEY')
IMAGE_PATH = '이미지 경로 설정'  

with open(IMAGE_PATH, 'rb') as f:
    image_data = base64.b64encode(f.read()).decode()

payload = {
    'version': 'V2',
    'requestId': str(uuid.uuid4()),
    'timestamp': int(time.time() * 1000),
    'images': [
        {
            'name': 'sample',
            'format': 'jpg',
            'data': image_data
        }
    ]
}

headers = {
    'X-OCR-SECRET': SECRET_KEY,
    'Content-Type': 'application/json'
}

response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
result = response.json()

if 'images' in result:
    fields = result['images'][0].get('fields', [])
    print('--- 인식된 텍스트 ---')
    print(''.join([field['inferText'] for field in fields]))
else:
    print('응답 형식이 예상과 다릅니다:', result)

In [None]:
from matplotlib import pyplot as plt

def plt_imshow(title='image', img=None, figsize=(8 ,5)):
    plt.figure(figsize=figsize)
 
    if type(img) == list:
        if type(title) == list:
            titles = title
        else:
            titles = []
 
            for i in range(len(img)):
                titles.append(title)
 
        for i in range(len(img)):
            if len(img[i].shape) <= 2:
                rgbImg = cv2.cvtColor(img[i], cv2.COLOR_GRAY2RGB)
            else:
                rgbImg = cv2.cvtColor(img[i], cv2.COLOR_BGR2RGB)
 
            plt.subplot(1, len(img), i + 1), plt.imshow(rgbImg)
            plt.title(titles[i])
            plt.xticks([]), plt.yticks([])
 
        plt.show()
    else:
        if len(img.shape) < 3:
            rgbImg = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        else:
            rgbImg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
        plt.imshow(rgbImg)
        plt.title(title)
        plt.xticks([]), plt.yticks([])
        plt.show()

In [None]:
import numpy as np
import platform
from PIL import ImageFont, ImageDraw, Image

def put_text(image, text, x, y, color=(0, 255, 0), font_size=22):
    if type(image) == np.ndarray:
        color_coverted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(color_coverted)
 
    if platform.system() == 'Darwin':
        font = 'AppleGothic.ttf'
    elif platform.system() == 'Windows':
        font = 'malgun.ttf'
        
    image_font = ImageFont.truetype(font, font_size)
    font = ImageFont.load_default()
    draw = ImageDraw.Draw(image)
 
    draw.text((x, y), text, font=image_font, fill=color)
    
    numpy_image = np.array(image)
    opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
 
    return opencv_image

In [None]:
img = cv2.imread(IMAGE_PATH)
roi_img = img.copy()
 
for field in result['images'][0]['fields']:
    text = field['inferText']
    vertices_list = field['boundingPoly']['vertices']
    pts = [tuple(vertice.values()) for vertice in vertices_list]
    topLeft = [int(_) for _ in pts[0]]
    topRight = [int(_) for _ in pts[1]]
    bottomRight = [int(_) for _ in pts[2]]
    bottomLeft = [int(_) for _ in pts[3]]
 
    cv2.line(roi_img, topLeft, topRight, (0,255,0), 2)
    cv2.line(roi_img, topRight, bottomRight, (0,255,0), 2)
    cv2.line(roi_img, bottomRight, bottomLeft, (0,255,0), 2)
    cv2.line(roi_img, bottomLeft, topLeft, (0,255,0), 2)
    roi_img = put_text(roi_img, text, topLeft[0], topLeft[1] - 10, font_size=30)
    
    print(text)
 
plt_imshow(["Original", "ROI"], [img, roi_img], figsize=(16, 10))