## TODO
1) General OCR + 공통기능
 - ~~Craft 추가~~
 - ~~util 함수 추가~~
 - 재활용 가능한 모듈 구현 (preprocessing, pipeline evaluation)
 - Serving (API 만들기)
 - hard example에 대한 인식성능 향상

# OCR PIPELINE

In [None]:
from detector import Detector
from recognizer import Recognizer

from utility import draw_bbox

import matplotlib.pyplot as plt
import cv2
import numpy as np

from glob import glob
import string

## Pipeline Config 정의

In [None]:
pipeline_config = {
    'detector' : {
        'name' : 'craft', # ['pan', 'craft'],
        'model_path' : './detector/CRAFT/craft_ic15_20k.pth', # ['./detector/CRAFT/craft_ic15_20k.pth', './detector/PAN/PANNet_latest.pth']
        
        'craft_options' : {
            'canvas_size' : 1280,
            'mag_ratio' : 1.5,
            "text_threshold" : 0.7,
            "link_threshold" : 0.4,
            "low_text" : 0.4
        }
    },
    
    'recognizer' : {
        
        "model":{
        "transform": "TPS",
        "extraction": "ResNet",
        "sequence": "BiLSTM", # [None | BiLSTM | Transformer]
        "prediction": "Attn", # [CTC | Attn | Transformer]
        "input_channel": 1,
        "output_channel": 512,
        "hidden_size": 256,
        "saved_model" : './recognizer/pretrained_model/TPS-ResNet-BiLSTM-Attn-case-sensitive.pth'
        },
        
        "transform":{
            "num_fiducial": 20,
        },
        
        "training":{
            "num_class": 96, # 계산해서 할당 됨 (CTC의 경우 Attn, Transformer보다 class가 1개 더 적음), case-sensitive model일 경우 96으로 변경
            "batch_max_length" : 25,
        },
        
        "dataset":{
#             "characters" : '0123456789abcdefghijklmnopqrstuvwxyz', # 데이터 셋 로드 후 초기화
            "characters" : string.printable[:-6], # case-sensitive model일 경우
            "imgH": 32,
            "imgW": 100
        }
    }
}

# number of class 할당 (CTC prediction과 Attn의 prediction의 경우 class가 다름)
n_characters = len(pipeline_config['recognizer']['dataset']['characters'])+1
pipeline_config['recognizer']['training']['num_class'] = n_characters if pipeline_config['recognizer']['model']['prediction'] == 'CTC' else n_characters+1

In [None]:
# Text Detetction Model Load
gpu_id = None
print ("Load Text Detection Model")
text_detector = Detector(pipeline_config['detector'],gpu_id=gpu_id)

In [None]:
# Text Recognition Model Load
# Text recognizer는 gpu에서만 동작
gpu_id = 3
print("Load Text Recognition Model")
text_recognizer = Recognizer(pipeline_config['recognizer'],gpu_id= gpu_id)

## Image Test

In [None]:
# image load
img_path ='./samples/sample_2.jpg'
img = cv2.cvtColor(cv2.imread(img_path,cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)

In [None]:
# detection show
text_images = []

# text detection prediction
if pipeline_config['detector']['name'] == 'pan':
    preds, boxes_list, _ = text_detector.detector.predict(img)
    
elif pipeline_config['detector']['name'] == 'craft':
    boxes_list, ret_score_text, _ = text_detector.detector.prediction(img)
    
text_images = []
for box in boxes_list:
    y_min,y_max = int(min(box[:,1])),int(max(box[:,1]))
    x_min,x_max = int(min(box[:,0])),int(max(box[:,0]))
    
    # text recognition 모델의 경우 gray-scale image를 입력으로 받음
    text_images.append(cv2.cvtColor(img[y_min:y_max,x_min:x_max,:],cv2.COLOR_RGB2GRAY))

img_result = draw_bbox(img, boxes_list)
img_result = np.expand_dims(img_result, axis=0)[0,:,:]

# text recognition prediction
predictions = text_recognizer.prediction(text_images)

In [None]:
# Result Visualization

# text detection 결과
fig = plt.figure()
plt.imshow(img_result)
plt.show()

# text recognition 결과
for detected_area, text in zip(predictions[0], predictions[1]):
    plt.figure()
    plt.title(text)
    plt.imshow(detected_area,cmap='gray')
    plt.show()