In [2]:
import os
import cv2
import numpy as np
import pdfplumber
import shutil

path_to_frozen_inference_graph = 'Mask_RCNN/data/frozen_inference_graph_coco.pb'
path_coco_model = 'Mask_RCNN/data/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt'
class_label = "data/object_detection_classes_coco.txt"
net = cv2.dnn.readNetFromTensorflow(path_to_frozen_inference_graph, path_coco_model)
colors = np.random.randint(125, 255, (80, 3))

def pre_process(images_path, pre_process_path):
    res_dict = dict()
    file_list = os.listdir(images_path)
    
#     img_count = len(file_list)
    
    for idx, filename in enumerate(file_list):
        file_extension = filename.split(".")[-1]
        file_name = filename.split(".")[0]
        res_dict[filename] = {"file_type": file_extension.upper()}

        detection_type_list = ["pdf", "png", "jpg", "jpeg"]
        img_extension_list = ["png", "jpg", "jpeg"]
        if file_extension not in detection_type_list:
            continue
            
        if file_extension in img_extension_list:
            shutil.copy(os.path.join(images_path, filename), os.path.join(pre_process_path, f"{file_name}_page_0.png"))
        else:
            
            with pdfplumber.open(os.path.join(images_path, filename)) as pdf:
                for page_num, page in enumerate(pdf.pages):
                    img = page.to_image()
                    pdf_image = page.to_image().original
    
                    # 将原始图像数据转换为 OpenCV 图像对象
                    open_cv_image = cv2.cvtColor(np.array(pdf_image), cv2.COLOR_RGB2BGR)
                    cv2.imwrite(os.path.join(pre_process_path, f"{file_name}_page_{page_num}.png"), open_cv_image)
                
    return res_dict
                

def get_mask(task_id, node, pre_process_path, res_dict):
    LABELS = open(class_label).read().strip().split("\n")
    

    pre_process_files = os.listdir(pre_process_path)
    pre_process_files_count = len(pre_process_files)


    for idx, img_name in enumerate(pre_process_files):
        print(f"{idx}/{pre_process_files_count}")
    
        file_name = img_name.split("_page")[0]
   
        matching_keys = [key for key in res_dict.keys() if file_name in key]
        if not matching_keys:
            continue
            
        related_pdf_name = matching_keys[0]
        print(f"模糊匹配到的键：{matching_keys}")


        img = cv2.imread(os.path.join(pre_process_path, img_name))
        height, width, _ = img.shape
        

        blob = cv2.dnn.blobFromImage(img, swapRB=True)
        net.setInput(blob)
        boxes, masks = net.forward(["detection_out_final", "detection_masks"])
        detection_count = boxes.shape[2]

        # Todo: 加日志 idx/img_counts 
        result_path = os.path.join("output", "detect", task_id)
        mask_path = os.path.join(result_path, "mask")
        
        if not os.path.exists(mask_path): 
            os.makedirs(mask_path)
        
        black_image = np.zeros(img.shape, dtype="uint8")

        
#         print(height, width, roi_height, roi_width)
        
        single_res = list()
        for i in range(detection_count):
            box = boxes[0, 0, i]
            class_id = box[1]
            score = box[2]
            if score < 0.5:
                continue

            cur_label = LABELS[int(class_id)]
 

            x = int(box[3] * width)
            y = int(box[4] * height)
            x2 = int(box[5] * width)
            y2 = int(box[6] * height)
            print(x, y, x2, y2)
            roi = black_image[y: y2, x: x2]
            roi_height, roi_width, _ = roi.shape  

            
            mask = masks[i, int(class_id)]
            mask = cv2.resize(mask, (roi_width, roi_height))
            _, mask = cv2.threshold(mask, 0.5, 255, cv2.THRESH_BINARY)
            
            
            contours, _ = cv2.findContours(np.array(mask, np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            color = colors[int(class_id)]

            cv2.rectangle(img, (x, y), (x2, y2), (int(color[0]), int(color[1]), int(color[2])), 2)

            # Draw the label on the image
            cv2.putText(img, cur_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (int(color[0]), int(color[1]), int(color[2])), 2)
            

            if not res_dict[related_pdf_name].get("result"):
                res_dict[related_pdf_name]["result"] = list()
            
        
            
            detection_res = {
                "class_id": int(class_id),
                "label": cur_label,
                "bbox": [x, y, x2, y2],
                "score": score,
            }
            
            if mask is not None:
                print("mask", type(mask), mask.shape)
                mask_file_path = os.path.join(mask_path ,f'{file_name}_mask_{i}.txt')
                np.savetxt(mask_file_path, mask)
                data = np.loadtxt(mask_file_path)

                print("data", type(data), data.shape)
                detection_res["mask_file"] = mask_file_path    # 保存掩码文件路径
            
            res_dict[related_pdf_name]["result"].append(detection_res)

            
            print(f'Task: {task_id} The file {img_name} predicted current object class is: {cur_label} axis: {x} {y} {x2} {y2}')
            cv2.imwrite(os.path.join(result_path, 'output.jpg'), img)
            
        print(f'Task: {task_id} The file {img_name} predicted done.')

    return res_dict

def handler(detect_floder, task_id, node):
    images_path = os.path.join("temp_storage", detect_floder)

    pre_process_path = os.path.join(images_path, "pre_process")
    if not os.path.exists(pre_process_path):
        os.mkdir(pre_process_path)
    
    res_dict = pre_process(images_path, pre_process_path)
    print(res_dict)
    res = get_mask(task_id, node, pre_process_path, res_dict)
    
    # Todo: 释放资源， 结果增加到 MongoDB
    
    print(res)

if __name__ == "__main__":    
    detect_floder = "detect_demo1"
    task_id = "123"
    node = "worker1"
    handler(detect_floder, task_id, node)

{'户口本.pdf': {'file_type': 'PDF'}, '02公示无异议证明.docx': {'file_type': 'DOCX'}, '03 宗地图.pdf': {'file_type': 'PDF'}, '02-身份证.pdf': {'file_type': 'PDF'}, '不动产登记申请表.pdf': {'file_type': 'PDF'}, 'table2.pdf': {'file_type': 'PDF'}, 'pre_process': {'file_type': 'PRE_PROCESS'}, 'test.jpeg': {'file_type': 'JPEG'}, 'table1.pdf': {'file_type': 'PDF'}}
0/13
模糊匹配到的键：['table1.pdf']
Task: 123 The file table1_page_2.png predicted done.
1/13
模糊匹配到的键：['test.jpeg']
326 27 518 281
mask <class 'numpy.ndarray'> (254, 192)
data <class 'numpy.ndarray'> (254, 192)
Task: 123 The file test_page_0.png predicted current object class is: person axis: 326 27 518 281
Task: 123 The file test_page_0.png predicted done.
2/13
模糊匹配到的键：['table1.pdf']
Task: 123 The file table1_page_1.png predicted done.
3/13
模糊匹配到的键：['table2.pdf']
Task: 123 The file table2_page_0.png predicted done.
4/13
模糊匹配到的键：['不动产登记申请表.pdf']
Task: 123 The file 不动产登记申请表_page_1.png predicted done.
5/13
模糊匹配到的键：['table2.pdf']
Task: 123 The file table2_page_1.pn