In [4]:
import cv2
import pandas as pd
import numpy as np
import os
import math
import json

from pdf2image import convert_from_path
from PIL import Image
from matplotlib import pyplot as plt

# origin_pdf_dir = './scan/datas/240718_좌표용_testPDF/300dpi'   # PDF 파일이 있는 폴더 경로
# pdf_to_divied_jpg_dir = origin_pdf_dir + "/divided"              # pdf를 n개 쪽의 jpg로 저장할(된) 디렉토리
# divied_jpg_crop_from_vertex_dir = origin_pdf_dir + "/square"    # n개쪽의 jpg들을 꼭지점 네모를 기준으로 잘라 저장할(된) 디렉토리
# draw_sertors_and_circles_dir = origin_pdf_dir + "/circles"      # 동그라미 검출 이미지를 저장할 폴더 경로

origin_data_dir = './scan/datas/240718_분석테스트용'

project_num_dict = {'5537' : "MIT중",
                '5539' : "MIT(고등)",
                '5541' : "CLS(초등)",
                '5543' : "CLS중고등",
                '5545' : "POWER",
                '5547' : "FIT"}

In [41]:
with open('./scan/datas/json/rects_point_dict.json', 'r') as f:
    rects_point_dict = json.load(f)
rects_point_dict = {outer_key: {int(inner_key): value for inner_key, value in inner_dict.items()} for outer_key, inner_dict in rects_point_dict.items()}
    
with open('./scan/datas/json/circle_point_dict.json', 'r') as f:
    circle_point_dict = json.load(f)
circle_point_dict = {outer_key: {int(middle_key): {int(inner_key): value for inner_key, value in inner_dict.items()} for middle_key, inner_dict in middle_dict.items()} for outer_key, middle_dict in circle_point_dict.items()}

with open('./scan/datas/json/project_dict.json', 'r') as f:
    project_dict = json.load(f)

with open('./scan/datas/json/templete_size_dict.json', 'r') as f:
    templete_size_dict = json.load(f)
templete_size_dict = {outer_key: {int(inner_key): value for inner_key, value in inner_dict.items()} for outer_key, inner_dict in templete_size_dict.items()}

In [125]:
project_name = 'CLS중고등'

datas_dir = f'{project_name}/240412101_동탄목동중2_cls/'
project_num = [k for k, v in project_num_dict.items() if v == project_name][0]
project_size_dict = templete_size_dict[project_num]
test_dir = os.path.join(origin_data_dir, datas_dir)
save_dir = os.path.join(origin_data_dir, f'{project_name}/test/')


listdir = os.listdir(test_dir)
listdir.sort()

##### 검사지만
def chunk_list(lst, chunk_size):
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

##### 좌표용 + 검사지
def cover_edges_with_white(image, border_size):
    # OpenCV는 이미지를 numpy 배열로 처리
    image_np = np.array(image)
    
    # 현재 이미지의 크기
    height, width = image_np.shape[:2]
    
    # 상단 가장자리 덮기
    image_np[:border_size, :] = [255, 255, 255]
    # 하단 가장자리 덮기
    image_np[height-border_size:, :] = [255, 255, 255]
    # 왼쪽 가장자리 덮기
    image_np[:, :border_size] = [255, 255, 255]
    # 오른쪽 가장자리 덮기
    image_np[:, width-border_size:] = [255, 255, 255]
    
    return image_np

##### 좌표용 + 검사지
def is_square(cnt, min_area, max_area):
    area = cv2.contourArea(cnt)
    if area < min_area or area > max_area:
        return False

    return True

##### 좌표용 + 검사지
def add_white_border(image, border_size):
    # OpenCV는 이미지를 numpy 배열로 처리
    image_np = np.array(image)
    
    # 하얀색 여백 추가 (위, 아래, 왼쪽, 오른쪽)
    bordered_image = cv2.copyMakeBorder(image_np, 
                                        border_size, border_size, border_size, border_size, 
                                        cv2.BORDER_CONSTANT, value=[255, 255, 255])
    return bordered_image

##### 검사지만
def find_contour(image):
    # 그레이스케일 변환
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 블러링을 통해 노이즈 제거
    blurred = cv2.GaussianBlur(gray, (9, 9), 0)

    # 바이너리 스레시홀딩
    _, binary = cv2.threshold(blurred, 150, 255, cv2.THRESH_BINARY_INV)

    # 외곽선 찾기
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    return contours

##### 검사지만
def is_in_circle(shifted_cnt, cdx, page, rects_point_dict, where_in_real_circle_dict):
    (x, y), radius = cv2.minEnclosingCircle(shifted_cnt)
    # r = int(radius)//2
    page = int(page)
    
    # 원의 경계 좌표
    circle_left = x - radius
    circle_top = y - radius
    circle_right = x + radius
    circle_bottom = y + radius

    # 원이 사각형 내부에 완전히 포함되는지 확인
    pass_point = False
    for rdx, rect_point in enumerate(rects_point_dict):
        sector = rdx+1
        left, top, right, bottom = rect_point[:4]
        if (circle_left >= left and
            circle_top >= top and
            circle_right <= right and
            circle_bottom <= bottom):
            pass_point = True
            if cdx not in where_in_real_circle_dict:
                where_in_real_circle_dict[cdx] = {}
            if page not in where_in_real_circle_dict[cdx]:
                where_in_real_circle_dict[cdx][page] = {}
            if sector not in where_in_real_circle_dict[cdx][page]:
                where_in_real_circle_dict[cdx][page][sector] = []
                
            where_in_real_circle_dict[cdx][page][sector].append([rect_point])
            break
    
    if not pass_point:
        return False
    
    return True

################################################################################
page_per_one = 4 # 최초 좌표 잡을 때 입력값 들어와야함
if len(listdir) % page_per_one != 0:
    print('페이지 수 안맞음')
    
chunked_list = chunk_list(listdir, 4)

for cdx, c_list in enumerate(chunked_list):
    where_in_real_circle_dict = dict()
    c_list432 = c_list[1:]
    c_list432.reverse()
    c_list[1:] = c_list432
    for page, jpg in enumerate(c_list):
        page += 1
        page_rect_dict = rects_point_dict[project_name][page]
        width, height = project_size_dict[page]
        image_path = os.path.join(test_dir, jpg)
        image = cv2.imread(image_path)
        # print(image.shape)
        image = cover_edges_with_white(image, 15)
        
        min_area=10
        max_area=150
        # height, width = image.shape[:2]
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray, (1, 1), 0)
        edges = cv2.Canny(blurred, 50, 20) # 50 100
    
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        rectangles = [cnt for cnt in contours if is_square(cnt, min_area, max_area)]
        rectangles = rectangles[:10] + rectangles[-10:]
        # cv2.drawContours(image, rectangles, -1, (255, 0, 0), 5)
        
        top_left = (float('inf'), float('inf'))
        top_right = (float('-inf'), float('inf'))
        bottom_left = (float('inf'), float('-inf'))
        bottom_right = (float('-inf'), float('-inf'))

        for contour in rectangles:
            x, y, w, h = cv2.boundingRect(contour)
            points = [[x, y], [x + w, y], [x, y + h], [x + w, y + h]]
            
            for point in points:
                if point[0] + point[1] < top_left[0] + top_left[1]:
                    top_left = point
                if point[0] - point[1] > top_right[0] - top_right[1]:
                    top_right = point
                if point[0] - point[1] < bottom_left[0] - bottom_left[1]:
                    bottom_left = point
                if point[0] + point[1] > bottom_right[0] + bottom_right[1]:
                    bottom_right = point
                    
        ##### 네모 점 그리기
        # cv2.circle(image, top_left, 80, (0, 0, 255), -1)
        # cv2.circle(image, top_right, 80, (0, 0, 255), -1)
        # cv2.circle(image, bottom_left, 80, (0, 0, 255), -1)
        # cv2.circle(image, bottom_right, 80, (0, 0, 255), -1)
        
        original_points = np.float32([top_left, top_right, bottom_left, bottom_right])
        dst_points = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
        matrix = cv2.getPerspectiveTransform(original_points, dst_points)
        transformed_image = cv2.warpPerspective(image, matrix, (width, height))
        
        bordered_image = add_white_border(transformed_image, 30)
        
        
    
        hsv = cv2.cvtColor(bordered_image, cv2.COLOR_BGR2HSV)
        lower_red1 = np.array([0, 150, 150])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([170, 150, 150])
        upper_red2 = np.array([180, 255, 255])
        mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask = cv2.bitwise_or(mask1, mask2)
        bordered_image[mask > 0] = [255, 255, 255]
        
        contours = find_contour(bordered_image)
        filtered_contours = [cnt for cnt in contours if (cv2.contourArea(cnt) > 10) and (cv2.contourArea(cnt) < 100)]
        shifted_contours = []
        for cnt in filtered_contours:
            shifted_cnt = np.array(cnt)
            if is_in_circle(shifted_cnt, cdx, page, page_rect_dict, where_in_real_circle_dict):
                shifted_contours.append(shifted_cnt)
                
        #### 섹터 그리기
        for x,y,xw,yh,t,part,start,item,repeat in rects_point_dict[project_name][page]:
            start_point = (x,y)
            end_point = (xw,yh)
            color = (0, 255, 0)
            thickness = 2
            cv2.rectangle(bordered_image, start_point, end_point, color, thickness)
        #### 실제 원 그리기
        for sector, circles in circle_point_dict[project_name][page].items():
            for coordinate in circles:
                x,y,r = coordinate[:3]
                cv2.circle(bordered_image, (x, y), 7, (255, 0, 0), 2)
        #### 검출된 마킹 그리기
        cv2.drawContours(bordered_image, shifted_contours, -1, (0, 0, 255), 2)
        cv2.imwrite(f'{save_dir}_{jpg}', bordered_image)
        
        
        
        
        # print()
        # plt.figure(figsize=(20, 20))
        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # plt.axis('off')
        # plt.show()
    # if cdx == 3:
    #     break
    

In [119]:
rects_point_dict[project_name][1]

[[185, 250, 364, 288, [0], 'A', ['중학교'], 'schltype', [0, 0]],
 [191, 311, 360, 347, [0], 'A', ['남'], 'gender', [0, 0]],
 [178, 421, 372, 617, [4], 'A', [1, '.', 0, 0, '.', 0, 0], 'schl', [0, 0]],
 [380, 215, 861, 621, [3], 'A', [], 'name', [0, 0]],
 [340, 718, 441, 870, [0], 'A', [1], '', [0, 0]],
 [753, 719, 855, 899, [0], 'A', [1], '', [0, 0]],
 [437, 936, 686, 1164, [5, 2, 7, 7, 7], 'A', [1, 1, 1, 1], '', [0, 0]]]