# 영수증 추출한 test이미지의 후처리

## Set-up

In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import json
from tqdm import tqdm

In [2]:
def show_anns(anns):
    if len(anns) == 0:
        return
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    ax = plt.gca()
    ax.set_autoscale_on(False)

    img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
    img[:,:,3] = 0
    for ann in sorted_anns:
        m = ann['segmentation']
        color_mask = np.concatenate([np.random.random(3), [0.35]])
        img[m] = color_mask
    ax.imshow(img)

In [3]:
def visualize_with_red_mask(result_dict, image, image_name):
    # 결과를 area 기준으로 정렬
    sorted_results = sorted(result_dict, key=lambda x: x['area'], reverse=True)
    
    # 가장 큰 segment 가져오기
    largest_segment = sorted_results[0]
    
    # segmentation mask 가져오기
    segmentation_mask = largest_segment['segmentation']
    
    # 원본 이미지 복사
    overlay_image = image.copy()
    
    # 붉은색 투명 마스크 적용
    red_mask = np.zeros_like(image, dtype=np.uint8)
    red_mask[segmentation_mask] = [0, 0, 255]  # 붉은색 (BGR 포맷)
    
    # 투명도 적용 (0.5 투명한 마스크)
    alpha = 0.5
    overlay_image = cv2.addWeighted(red_mask, alpha, overlay_image, 1 - alpha, 0)

    # 시각화
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(overlay_image, cv2.COLOR_BGR2RGB))  # OpenCV는 BGR이므로 RGB로 변환
    plt.axis('off')
    plt.title(f"Largest Segment {image_name}")
    plt.show()

In [4]:
def save_largest_segment_without_mask(result_dict, image, output_path, image_name):
    # 결과를 area 기준으로 정렬
    sorted_results = sorted(result_dict, key=lambda x: x['area'], reverse=True)
    
    # 가장 큰 segment 가져오기
    largest_segment = sorted_results[0]
    
    # segmentation mask 가져오기
    segmentation_mask = largest_segment['segmentation']
    
    # 마스크 적용하여 해당 부분만 추출
    mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
    mask[segmentation_mask] = 1  # 마스크 부분을 1로 설정
    
    # 이미지에 마스크 적용해서 해당 부분만 추출
    segmented_image = cv2.bitwise_and(image, image, mask=mask)

    # Bounding box를 사용해 해당 영역만 crop
    x, y, w, h = largest_segment['bbox']
    x , y, w, h = int(x), int(y), int(w), int(h)
    cropped_segment = segmented_image[y:y+h, x:x+w]

    # plt.figure(figsize=(10, 10))
    # plt.imshow(cv2.cvtColor(cropped_segment, cv2.COLOR_BGR2RGB))  # OpenCV는 BGR이므로 RGB로 변환
    # plt.axis('off')
    # plt.title(f"Largest Segment {image_name}")
    # plt.show()

    # 결과 저장
    cv2.imwrite(output_path+image_name, cropped_segment)
    # print(f"Segmented image saved to {output_path+image_name}")

In [5]:
def adjust_data(original_data, adjusted_data, image_name, result_dict, idx):
    # 원본 이미지에서 자른 영역의 좌상단 좌표 (x, y)
    sorted_results = sorted(result_dict, key=lambda x: x['area'], reverse=True)
    largest_segment = sorted_results[idx]

    cropped_x, cropped_y, _, _ = largest_segment['bbox']  # 자른 이미지의 좌표 값
    cropped_x = int(cropped_x)
    cropped_y = int(cropped_y)

    # 좌표 조정
    
    image_data = original_data['images'][image_name]
    adjusted_words = {}
    for word_id, word_info in image_data['words'].items():
        points = word_info['points']
        
        # 모든 좌표에서 자른 좌표의 (x, y)를 빼줌
        adjusted_points = [
            [point[0] + cropped_x, point[1] + cropped_y] for point in points
        ]
        
        # 조정된 points를 저장
        adjusted_words[word_id] = {
            'points': adjusted_points,
            # 'orientation': word_info['orientation'],
            # 'language': word_info['language']
        }
        
    #     adjusted_data['images'][image_name] = {'words': adjusted_words}
    # return adjusted_data
    return {'words': adjusted_words}

## Automatic mask generation

To run automatic mask generation, provide a SAM model to the `SamAutomaticMaskGenerator` class. Set the path below to the SAM checkpoint. Running on CUDA and with the default model is recommended.

In [6]:
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

sam_checkpoint = "sam_vit_h_4b8939.pth"
model_type = "vit_h"

device = "cuda"

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)

mask_generator = SamAutomaticMaskGenerator(sam)

## test json의 후처리 작업

In [10]:
# JSON 파일 경로와 이미지 경로 설정

# json_path = '/root/outputs/resnet50_type07_newimg/submissions/20241018_184749_new_test.json'
json_path = '/root/outputs/efficientnet_b0/submissions/20241024_014437.json'
image_base_path = '/root/data/datasets/images/test/'
output_path = '/root/data/datasets/remove_background/test/'

# JSON 파일 로드
with open(json_path, 'r', encoding='utf-8') as f:
    test_data = json.load(f)

In [11]:
# 수정한 json파일
adjusted_data = {'images': {}}

# for image_json, i in zip(data['images'].items(), range(5)):
for image_json in tqdm(test_data['images'].items()):
    # 임시 횟수 제한
    # if i == 5:
    #     break
    image_name, image_data = image_json

    image = cv2.imread(image_base_path+image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    masks = mask_generator.generate(image)

    # 수정한 json을 저장
    # result_json = adjust_data(val_data, adjusted_data, image_name, masks)
    adjusted_data['images'][image_name] = adjust_data(test_data, adjusted_data, image_name, masks, 0)

    # visualize_with_red_mask(masks, image, image_name)
    # save_largest_segment_without_mask(masks, image, output_path, image_name)
# 조정된 JSON 파일 저장
# with open('/root/data/datasets/jsons/adjusted_val.json', 'w') as f:
#     json.dump(adjusted_data, f, indent=4)

with open('/root/outputs/efficientnet_b0/submissions/20241024_014437_post_pro.json', 'w') as f:
    json.dump(adjusted_data, f, indent=4)
# with open('./adjusted_val1.json', 'w') as f:
#     json.dump(result_json, f, indent=4)

100%|██████████| 413/413 [16:15<00:00,  2.36s/it]


In [11]:
json_path = '/root/outputs/efficientnet_b0/submissions/20241024_014437_post_pro.json'
# image_base_path = '/root/data/datasets/images/test/'
# output_path = '/root/data/datasets/remove_background/test/'

# 1차 조정 JSON 파일 로드
# with open(json_path, 'r', encoding='utf-8') as f:
#     adjusted_data = json.load(f)

json_path = '/root/outputs/efficientnet_b0/submissions/20241024_014437.json'

# 무조정 JSON 파일 로드
with open(json_path, 'r', encoding='utf-8') as f:
    test_data = json.load(f)


json_path = '/root/data/datasets/jsons/need_more_segment_background.json'

# 이상 데이터 json목록 로드
with open(json_path, 'r', encoding='utf-8') as f:
    list_data = json.load(f)

In [13]:
for image_name in tqdm(list_data['test']):
    image_data = test_data['images'][image_name]

    image = cv2.imread(image_base_path+image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    masks = mask_generator.generate(image)

    # 수정한 json을 저장
    # result_json = adjust_data(val_data, adjusted_data, image_name, masks)
    adjusted_data['images'][image_name] = adjust_data(test_data, adjusted_data, image_name, masks, 1)

with open('/root/outputs/efficientnet_b0/submissions/20241024_014437_post_pro.json', 'w') as f:
    json.dump(adjusted_data, f, indent=4)

100%|██████████| 167/167 [06:35<00:00,  2.37s/it]


In [19]:
json_path = '/root/outputs/resnet50_type07_newimg/submissions/20241018_184749_new_test_post_pro2.json'
image_base_path = '/root/data/datasets/images/test/'
output_path = '/root/data/datasets/remove_background/test/'

# 1차 조정 JSON 파일 로드
# with open(json_path, 'r', encoding='utf-8') as f:
#     adjusted_data = json.load(f)

json_path = '/root/outputs/efficientnet_b0/submissions/20241024_014437.json'

# 무조정 JSON 파일 로드
with open(json_path, 'r', encoding='utf-8') as f:
    test_data = json.load(f)


json_path = '/root/data/datasets/jsons/need_more_segment_background2.json'

# 이상 데이터 json목록 로드
with open(json_path, 'r', encoding='utf-8') as f:
    list_data = json.load(f)

In [15]:
for image_name in tqdm(list_data['test']):
    image_data = test_data['images'][image_name]

    image = cv2.imread(image_base_path+image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    masks = mask_generator.generate(image)

    # 수정한 json을 저장
    # result_json = adjust_data(val_data, adjusted_data, image_name, masks)
    adjusted_data['images'][image_name] = adjust_data(test_data, adjusted_data, image_name, masks, 2)

with open('/root/outputs/efficientnet_b0/submissions/20241024_014437_post_pro.json', 'w') as f:
    json.dump(adjusted_data, f, indent=4)

100%|██████████| 2/2 [00:04<00:00,  2.45s/it]
