In [25]:
import os
import cv2
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter, ImageOps
import matplotlib.pyplot as plt
import pandas as pd
import easyocr
reader = easyocr.Reader(['ko'])  # 'ko'는 한국어를 위해, 'en'은 영어를 위해

# Function to display image using matplotlib
def show_image(img, title="Image"):
    plt.figure(figsize=(10, 6))
    plt.imshow(img, cmap='gray')
    plt.title(title)
    plt.axis('off')
    plt.show()

def process_image_turn(image_path, file):
    img = Image.open(image_path)
    # show_image(img)
    
       # padding 해줘서 돌릴 때 짤리지 않게
    img = ImageOps.expand(img, border=100, fill='white')
    # show_image(img)
    img = np.array(img)  # Convert the PIL image to a NumPy array
    
    ##################### rotate
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply edge detection
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Find lines using HoughLines
    lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)

    if lines is None:
        return pd.DataFrame(columns=['ID', 'OCR', 'target'])

    # Calculate the angle of the skew
    angles = []
    for line in lines:
        rho, theta = line[0]
        angle = np.degrees(theta) - 90
        angles.append(angle)


    # Compute the median angle
    median_angle = np.median(angles)

    # Rotate the image to correct the skew if necessary
    (h, w) = img.shape[:2]
    center = (w // 2, h // 2)

    # print(median_angle)
    if abs(median_angle) > 12.0:  # Adjust this threshold as necessary
        M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
        rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    else:
        rotated = img  # Use the original image if no rotation is needed

    

    all_results = []

    # Convert back to PIL image to display
    rotated_pil = Image.fromarray(cv2.cvtColor(rotated, cv2.COLOR_BGR2RGB))
    
    
    
    gray_image = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
    average_brightness = np.mean(gray_image)
    # print(average_brightness)
    if average_brightness<160:
        # 밝기 조절
        enhancer = ImageEnhance.Brightness(rotated_pil)
        rotated_pil = enhancer.enhance(2.0)  # 밝기를 증가, 이것보다 높게 하면 자동회전이 잘못되서 위치 바꿈

        # 대비 조절
        enhancer = ImageEnhance.Contrast(rotated_pil)
        rotated_pil = enhancer.enhance(3.0)  # 대비를 증가, 이것보다 높게 하면 자동회전이 잘못되서 위치 바꿈
    

    def process_and_append(image, description):
        #print(description)
        # show_image(image)
        df = process_image_with_ocr(image, file)
        #print(df['OCR'])
        if not df.empty:
            print('*****************',file)
            # print(description, df['OCR'], df['target'])
            all_results.append(df)
            return True
        return False

    if process_and_append(rotated_pil, "rotated_pil"):
        return all_results
    
    rotated_270 = rotated_pil.rotate(270, expand=True)
    if process_and_append(rotated_270, "rotated_270"):
        return all_results
    
    rotated_180 = rotated_pil.rotate(180, expand=True)
    if process_and_append(rotated_180, "rotated_180"):
        return all_results
    
    rotated_90 = rotated_pil.rotate(90, expand=True)
    if process_and_append(rotated_90, "rotated_90"):
        return all_results
    
    

    mirrored_pil1 = ImageOps.mirror(rotated_pil)
    if process_and_append(mirrored_pil1, "mirrored_pil1"):
        return all_results
    
    mirrored_rotated_270 = mirrored_pil1.rotate(270, expand=True)
    if process_and_append(mirrored_rotated_270, "mirrored_rotated_270"):
        return all_results
    
    mirrored_rotated_180 = mirrored_pil1.rotate(180, expand=True)
    if process_and_append(mirrored_rotated_180, "mirrored_rotated_180"):
        return all_results
    
    mirrored_rotated_90 = mirrored_pil1.rotate(90, expand=True)
    if process_and_append(mirrored_rotated_90, "mirrored_rotated_90"):
        return all_results


    return all_results

# Function to process an image with OCR
def process_image_with_ocr(img, image_name):
    if img is None:
        return pd.DataFrame(columns=['ID', 'OCR', 'target'])

    image_width = img.width  # 이미지의 전체 너비
    image_height = img.height  # 이미지의 전체 높이
    crop_height = image_height // 3  # 이미지 높이의 1/5

    crop_area = (0, 0, image_width, crop_height)
    img_cropped = img.crop(crop_area)

    # Convert the cropped image to a format suitable for OpenCV
    # img = np.array(img)
    # img_cv = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    
    img_cropped = np.array(img_cropped)
    img_cropped_cv = cv2.cvtColor(img_cropped, cv2.COLOR_RGB2BGR)
    img_cropped_cv = cv2.cvtColor(img_cropped_cv, cv2.COLOR_BGR2GRAY)
    img_cropped_cv = cv2.fastNlMeansDenoising(img_cropped_cv, h=3)
    
    # show_image(img_cropped_cv)
    
    # EasyOCR로 텍스트 추출
    # results1 = reader.readtext(img_cv, detail=0)  # detail=0 returns only text
    # extracted_text1 = ' '.join(results1).replace(" ", "")
    # print(extracted_text1)
    
    # EasyOCR로 텍스트 추출
    results2 = reader.readtext(img_cropped_cv, detail=0)  # detail=0 returns only text
    extracted_text2 = ' '.join(results2).replace(" ", "")
    # print(extracted_text2)

  
    
    
    # 7,3 에 대해서만 판단하는 것이 맞는 듯. submission 94% 정확도가 올라가면 pytesseract ocr 정확도가 따라가지 못함. 
    keywords_4 = ['진단서','전단서']
    keywords_5= ['운전면허증']
    keywords_7 = ['통원','동원']
    keywords_10 = ['납입','남입','남임','남원','입확인서','임확인서','입확원서','임확원서','임화인서']
    keywords_3 = ['입원','퇴원','원확인서','완확원서','완확원서','진료확인서']
    keywords_8= ['주민등록증','주민등록중']
    keywords_9= ['대한민국','여권']    
    keywords_11= ['약제비계산서영수증','영수증','복약','조제약']
    keywords_13= ['이력서','자소서']
    keywords_14 = ['소견서']
    keywords_15= ['자동차등록증','자동차등록중','자동자등록증']
    keywords_12= ['처방전','처방']
    keywords_1= ['건강보험','임신','출산']
    keywords_6 = ['외래', '계산서']
    
    keywords_dict = {
        
        # 순서가 중요!!!! ocr에 검출되는 단어들이 중복되서 나오기 때문.
        4: keywords_4,
        5: keywords_5,
        
        7: keywords_7,
        10: keywords_10,
        3: keywords_3,
        8: keywords_8,
        9: keywords_9,
        
        11: keywords_11,
        
        13: keywords_13,
        14: keywords_14,
        15: keywords_15,
        12: keywords_12,
        1: keywords_1,
        6: keywords_6,
    }

    
    matched_results = []
    for number, keywords in keywords_dict.items():
        if any(keyword in extracted_text2 for keyword in keywords):
            matched_results.append((image_name, extracted_text2, number))
            break
        # elif any(keyword in extracted_text1 for keyword in keywords):
        #     matched_results.append((image_name, extracted_text1, number))
        #     break

    df = pd.DataFrame(matched_results, columns=['ID', 'OCR', 'target'])
    return df

pd.set_option('display.max_colwidth', None)

# Function to process all images in a directory
def process_directory(directory):
    all_results = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(root, file)
                results = process_image_turn(image_path, file)
                if len(results) > 0:
                    all_results.extend(results)
                              
    if all_results:
        final_df = pd.concat(all_results, ignore_index=True)
    else:
        final_df = pd.DataFrame(columns=['ID', 'OCR', 'target'])
    return final_df


# 특정 디렉토리 테스트용
# Directory containing the images
# directory = '/data/ephemeral/home/datasets_fin/ocr_test3'

# # Process all images in the directory
# result_df = process_directory(directory)

# # Save the result to a CSV file
# #result_df.to_csv(r'/data/ephemeral/home/notebook/ocr_output_3_7.csv', index=False)

# # Display the final result DataFrame
# print("Final Result DataFrame:")
# print(result_df)



# 다른 submission과 ensemble 작업할 때 사용
output = pd.read_csv(r'/data/ephemeral/home/notebook/output.csv')
filtered_ids = output[(output['target'].isin([3,7]))]['ID']
print(len(filtered_ids))



# 디렉토리 내 이미지를 처리
def process_directory2(directory, valid_ids):
    all_results = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file in valid_ids.values:
                image_path = os.path.join(root, file)
                # print("Processing:", image_path)  # 경로 확인용 출력
                results = process_image_turn(image_path, file)  # 각 파일에 대해 이미지 처리 함수 호출
                if len(results) > 0:
                    all_results.extend(results)  # 결과 합치기

    if all_results:
        final_df = pd.concat(all_results, ignore_index=True)
    else:
        final_df = pd.DataFrame(columns=['ID', 'OCR', 'target'])
    return final_df


# 이미지 처리
directory = '/data/ephemeral/home/datasets_fin/test'
final_df = process_directory2(directory, filtered_ids)
final_df.to_csv(r'/data/ephemeral/home/notebook/EASY_ocr_output_3_7_text2.csv', index=False)

print(final_df)


408
***************** 457d4a0ed1647ebb.jpg
***************** 8538d5326f92f9fb.jpg
***************** f95c84c901644f43.jpg
***************** 8979f2734e1e5f33.jpg
***************** b2a1a2dc8491cd35.jpg
***************** afa31e08d4f6249c.jpg
***************** 5a79e12b2b95d779.jpg
***************** e7f2c8600531b584.jpg
***************** 15d6890b32a3491a.jpg
***************** cb3179f4bfdf1878.jpg
***************** d2a9e0d2faa42d67.jpg
***************** 5d778edcf3564010.jpg
***************** a1a719520384a188.jpg
***************** 3a144769653b13e6.jpg
***************** 2adc963a2a9727a0.jpg
***************** 2d075b49d57a56c7.jpg
***************** 00e15da96484eb94.jpg
***************** 06fb9852b074f191.jpg
***************** 18912bd404188282.jpg
***************** bddaf431af26a850.jpg
***************** f0db66f90ef8b655.jpg
***************** 7f3311659138e1c1.jpg
***************** 33b3c8e82347cbb4.jpg
***************** 5f2ea6fb8d52e684.jpg
***************** 6d31a57776ef3b85.jpg
***************** dcc

In [26]:
print(len(filtered_ids))

408


In [28]:
import pandas as pd

pred_df = pd.read_csv("/data/ephemeral/home/notebook/output.csv")
ocr_df = pd.read_csv("/data/ephemeral/home/notebook/EASY_ocr_output_3_7_text2.csv")

merged_df = pred_df.merge(ocr_df[['ID', 'target']], on='ID', how='left', suffixes=('', '_3_7_ocr'))

# # target 값 대체 (replace)
merged_df['target'] = merged_df['target_3_7_ocr'].combine_first(merged_df['target'])

# # 필요 없는 'target_ocr' 컬럼 삭제
merged_df2 = merged_df.drop(columns=['target_3_7_ocr'])

# # 결과 저장
merged_df2.to_csv("/data/ephemeral/home/notebook/EASY_pred_ocr_0803_3_7_text2_ocr.csv", index=False)

In [32]:
merged_df2

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2.0
1,00091bffdffd83de.jpg,12.0
2,00396fbc1f6cc21d.jpg,5.0
3,00471f8038d9c4b6.jpg,12.0
4,00901f504008d884.jpg,2.0
...,...,...
3135,ffb4b6f619fb60ea.jpg,6.0
3136,ffb54299b1ad4159.jpg,10.0
3137,ffc2c91dff8cf2c0.jpg,8.0
3138,ffc4e330a5353a2a.jpg,0.0


In [33]:
df = pd.merge(pred_df, merged_df2, on='ID', how='left', suffixes=('', '_3_7_ocr'))
df[df['target']!=df['target_3_7_ocr']]
len(df[df['target']!=df['target_3_7_ocr']])

57

In [5]:
pred_df = pd.read_csv("/data/ephemeral/home/notebook/output.csv")
ocr_output_3_7 = pd.read_csv("/data/ephemeral/home/notebook/ocr_output_3_7.csv")
ocr_output_4_14 = pd.read_csv("/data/ephemeral/home/notebook/ocr_output_4_14.csv")


merged_df3 = pred_df.merge(ocr_output_3_7[['ID', 'target']], on='ID', how='left', suffixes=('', '3_7_ocr'))
merged_df4 = merged_df3.merge(ocr_output_4_14[['ID', 'target']], on='ID', how='left', suffixes=('', '4_14_ocr'))

In [6]:
merged_df4

Unnamed: 0,ID,target,target3_7_ocr,target4_14_ocr
0,0008fdb22ddce0ce.jpg,2,,
1,00091bffdffd83de.jpg,12,,
2,00396fbc1f6cc21d.jpg,5,,
3,00471f8038d9c4b6.jpg,12,,
4,00901f504008d884.jpg,2,,
...,...,...,...,...
3135,ffb4b6f619fb60ea.jpg,6,,
3136,ffb54299b1ad4159.jpg,10,,
3137,ffc2c91dff8cf2c0.jpg,8,,
3138,ffc4e330a5353a2a.jpg,0,,


In [8]:
df = merged_df4[merged_df4['target4_14_ocr'].isna()==False]
df[df['target']!=df['target4_14_ocr']]

Unnamed: 0,ID,target,target3_7_ocr,target4_14_ocr
451,2788f05973ca8a3c.jpg,4,,7.0
679,39c97e8c3e07a970.jpg,4,,3.0
1322,6e87683611244cc9.jpg,14,,4.0
1581,845d51149dd90a37.jpg,14,,3.0
1909,9e95f18106634063.jpg,4,,3.0
2029,a89b258856f1e224.jpg,4,,6.0
2323,bed3afcc44c426c3.jpg,14,,4.0
2334,c05bdf0dfc422bf0.jpg,4,,7.0
2349,c21908abd08920af.jpg,4,,3.0
