In [None]:
!pip install easyocr

In [None]:
import numpy as np
import imutils
import cv2
import pandas as pd
from easyocr import Reader

In [None]:
# 검출 영역 좌표로 지정
positions = [927, 163, 1003, 239],[1044, 165, 1093, 214],[1137, 170, 1178, 211],[792, 475, 869, 552],[793, 1306, 878, 1391],[794,1225,878,1292],[795, 1585, 875, 1665],[242, 737, 327, 822],[242, 609, 321, 688],[243, 479, 324, 560],[240, 546, 321, 627],[795, 1116, 874, 1195],[796, 1522, 874, 1600],[795, 570, 866, 641],[240, 1184, 299, 1243],[240, 1120, 301, 1181],[795, 1455, 875, 1535],[795, 988, 856, 1049],[794, 920, 854, 980],[795, 857, 854, 916],[793, 726, 851, 784],[794, 791, 852, 849],[239, 1251, 301, 1313],[243, 869, 322, 948],[243, 802, 325, 884]

In [None]:
# align 함수 정의

def align_images(image, template, maxFeatures=500, keepPercent=0.2, debug=False):
    imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
    orb = cv2.ORB_create(maxFeatures)
    (kpsA, descsA) = orb.detectAndCompute(imageGray, None)
    (kpsB, descsB) = orb.detectAndCompute(templateGray, None)

    method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING
    matcher = cv2.DescriptorMatcher_create(method)
    matches = matcher.match(descsA, descsB, None)

    matches = sorted(matches, key=lambda x:x.distance)

    keep = int(len(matches) * keepPercent)
    matches = matches[:keep]

    if debug:
        matchedVis = cv2.drawMatches(image, kpsA, template, kpsB,
            matches, None)
        matchedVis = imutils.resize(matchedVis, width=1000)
        cv2.imshow("Matched Keypoints", matchedVis)
        cv2.waitKey(0)
    ptsA = np.zeros((len(matches), 2), dtype="float")
    ptsB = np.zeros((len(matches), 2), dtype="float")

    for (i, m) in enumerate(matches):
        ptsA[i] = kpsA[m.queryIdx].pt
        ptsB[i] = kpsB[m.trainIdx].pt
        
    (H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC)
    (h, w) = template.shape[:2]
    aligned = cv2.warpPerspective(image, H, (w, h))

    return aligned

In [None]:
# align 실행

image = cv2.imread("hs.jpeg",) #이미지
template = cv2.imread("template_final.jpeg") #템플릿

aligned_1 = align_images(image, template, debug=True) #첫번째 align
aligned_2 = align_images(aligned_1, template, debug=True) #두번째 align

stacked = np.hstack([aligned_1, template])

overlay = template.copy()

output = aligned_2.copy()

# show the two output image alignment visualizations
cv2.imshow("Image Alignment Stacked", stacked)
cv2.imshow("Image Alignment Overlay", output)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# easyocr 설정

langs = ['en','ko']
reader = Reader(lang_list=langs, gpu=True)

In [None]:
# 읽어오기 (text recognition)

roi = []
result = []

for l in positions:
    im = output[l[1]:l[3],l[0]:l[2]]
    
    roi.append(im)
    text = reader.readtext(im)
    
    if text == []:
      result.append(text)
      continue

    result.append(text)

In [None]:
km = []
for a in result:
    if a == []: # 빗나가면 그냥 지나가도록
        km.append('')
        continue
    km.append(a[0][1])

# 후처리

In [None]:
for i in range(len(km)):
  if km[i] == '':
    km[i] = 'nan'
    
    
# km이라는 리스트 안에 있는 원소가 ''이면 'nan'으로 대체

In [None]:
for i in range(4,len(km)):
    t = km[i]
    if type(t)== str:
        if ('미' in t)or ('시' in t) or ('행' in t) is True:
            km[i] = '미시행'
bmi = km[16]
rate = str(km[4])
km = [str(km[i])[:3] for i in range(len(km))]
km[16] = bmi[:4]
km[4] = rate[:5]

#이름, 성별, 나이를 제외한 모든 항목에서 미 / 시 / 행 중 하나라도 검출되면 그 항목값을 미시행으로 바꿔줌

In [None]:
for i in range(len(km)):
    
    if type(km[i])==str:
        km[i] = km[i].replace('g','9')
        km[i] = km[i].replace('ㅇ','0')
        km[i] = km[i].replace('o','0')
        km[i] = km[i].replace('O','0')
        km[i] = km[i].replace('G','6')
        km[i] = km[i].replace('b','6')
        km[i] = km[i].replace(',','.')
        km[i] = km[i].replace('I','7')
        
# 오인식되는 패턴을 찾아 직접 바꾸어줌

In [None]:
import re

for i in range(3,5): 
    if km[i] in ['미시행','nan']:
        continue
    if i in [4,11,16]:
        numbers = re.findall(r'\d+', km[i])
        km[i] = '.'.join(numbers)
    else:
        numbers = re.findall(r'\d',km[i])
        km[i] = ''.join(numbers)
    if (len(km[i]) == 3) & (km[i][0]=='7'):
        km[i] = '1'+str(km[i][1:])
        
for i in range(6,len(km)): 
    if km[i] in ['미시행','nan']:
        continue
    if i in [4,11,16]:
        numbers = re.findall(r'\d+', km[i])
        km[i] = '.'.join(numbers)
    else:
        numbers = re.findall(r'\d',km[i])
        km[i] = ''.join(numbers)
    if (len(km[i]) == 3) & (km[i][0]=='7'):
        km[i] = '1'+str(km[i][1:])

In [None]:
km

In [None]:
if km[11] != '미시행':
    if float(km[11]) > 5:
        km[11] = str(float(km[11])/10)
        
if km[13] != '미시행':
    if float(km[13]) >= 100:
        km[13] = str(km[13][:1])
    if float(km[13]) > 20:
        km[13] = str(float(km[13])/10)

In [None]:
import os

name = ['이름','성별','나이','공복혈당','사구체여과율','요단백(정상/악양성/+1/+2/+3/+4)','중성지방','총콜레스테롤','최고-최저 혈압','수축기혈압','이완기혈압','크레아티닌','허리둘레','HbA1c','ALT','AST','BMI','FEV1/FVC','FEV1(L)','FEV1(%)','FVC(%)','FVC(L)','GTP','HDL','LDL']
idx = list(range(1,4)) + list(range(13,35))


df_final = pd.DataFrame({'순번':idx,'항목':name,'수치':km})

if not os.path.exists('/Users/daeun/Desktop/test_6.csv'):
    df_final.to_csv('/Users/daeun/Desktop/test_6.csv',mode='w', encoding='utf-8-sig')
else:
    df_final.to_csv('/Users/daeun/Desktop/test_6.csv',mode='a', encoding='utf-8-sig', header=False)