In [19]:
import cv2
import numpy as np

def crop_manga_footer_robust(image_path, output_path, padding=15):
    # 1. 이미지 불러오기
    img = cv2.imread(image_path)
    if img is None:
        print(f"이미지를 불러올 수 없습니다: {image_path}")
        return

    height, width = img.shape[:2]
    
    # 2. 그레이스케일 변환 및 이진화
    # Otsu's method를 사용하여 최적의 임계값을 자동으로 찾습니다.
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # 3. 하단 관심 영역(ROI) 설정 (이미지 높이의 하위 30%만 분석)
    roi_start_y = int(height * 0.7)
    roi = thresh[roi_start_y:height, :]
    
    # 4. ROI 내에서 행별 검은색 픽셀(콘텐츠) 합계 계산
    horizontal_sum = np.sum(roi, axis=1)
    
    # 5. 하단 텍스트 영역의 상단 경계 찾기
    # 텍스트 행으로 간주할 최소 픽셀 수 (이미지 너비에 비례하여 설정)
    text_row_threshold = width * 0.05
    
    crop_y = height # 기본값은 자르지 않음
    found_text_block = False
    
    # ROI의 아래에서 위로 스캔합니다.
    for i in range(len(horizontal_sum) - 1, -1, -1):
        current_y = roi_start_y + i
        
        # 해당 행에 텍스트(콘텐츠)가 일정량 이상 존재하는지 확인
        if horizontal_sum[i] > text_row_threshold:
            found_text_block = True
        
        # 텍스트 블록을 발견한 상태에서, 콘텐츠가 없는 행(공백)이 나타나면
        # 그 지점이 텍스트 블록의 상단 경계일 가능성이 높습니다.
        if found_text_block and horizontal_sum[i] <= text_row_threshold:
            crop_y = current_y
            break # 하단 텍스트 블록의 상단을 찾았으므로 반복 종료

    # 6. 패딩 적용 및 이미지 자르기
    # 찾은 경계선에서 위로 약간의 여유(padding)를 두어 본문이 잘리는 것을 방지합니다.
    final_crop_y = max(0, crop_y - padding)
    cropped_img = img[0:final_crop_y, :]
    
    # 7. 결과 저장
    cv2.imwrite(output_path, cropped_img)
    print(f"처리 완료: {output_path} (절단 높이: {final_crop_y}px)")

# --- 사용 예시 ---
# 첫 번째 이미지 테스트
crop_manga_footer_robust('input_0.jpg', 'output_image_0_cropped.jpg')
# 두 번째 이미지 테스트 (문제가 되었던 이미지)
crop_manga_footer_robust('input_1.jpg', 'output_image_1_cropped.jpg')

처리 완료: output_image_0_cropped.jpg (절단 높이: 1147px)
처리 완료: output_image_1_cropped.jpg (절단 높이: 1147px)


In [1]:
import cv2
import numpy as np

# Load the image (replace 'input.jpg' with your image file path)
image_path = 'img_002.jpg'
image = cv2.imread(image_path)
if image is None:
    raise ValueError("Image not found or unable to load.")

height, width = image.shape[:2]

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Binarize the image (invert so black pixels are 255)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Calculate the number of black pixels per row
row_sums = np.sum(binary // 255, axis=1)

# Threshold for considering a row as having content (adjust if needed)
content_threshold = 5  # Small number to ignore noise

# Find rows with content
content_rows = np.where(row_sums > content_threshold)[0]

if len(content_rows) == 0:
    # No content, nothing to crop
    cropped = image
else:
    # Find content blocks separated by significant gaps
    min_gap_size = 10  # Adjusted to better separate the bottom text (was 20)
    
    blocks = []
    start = content_rows[0]
    prev = content_rows[0]
    for row in content_rows[1:]:
        gap = row - prev - 1
        if gap >= min_gap_size:
            blocks.append((start, prev))
            start = row
        prev = row
    blocks.append((start, prev))  # Add the last block
    
    # Assume the main content is all blocks except the last small one (bottom text)
    if len(blocks) > 1:
        last_block_height = blocks[-1][1] - blocks[-1][0] + 1
        if last_block_height < 100:  # Assume bottom text block is smaller than 100 pixels (adjust as needed)
            crop_y = blocks[-1][0]  # Crop just before the last block starts
        else:
            crop_y = height
    else:
        crop_y = height
    
    # Crop the image
    cropped = image[0:crop_y, 0:width]

# Save the cropped image
output_path = 'output.jpg'
cv2.imwrite(output_path, cropped)

print(f"Cropped image saved to {output_path}")

Cropped image saved to output.jpg


In [6]:
import cv2
import numpy as np

# Load the image (replace 'input.jpg' with your image file path)
image_path = 'img_013.jpg'
image = cv2.imread(image_path)
if image is None:
    raise ValueError("Image not found or unable to load.")

height, width = image.shape[:2]

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Binarize the image (invert so black pixels are 255)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Calculate the number of black pixels per row
row_sums = np.sum(binary // 255, axis=1)

# Threshold for considering a row as having content (adjust if needed)
content_threshold = 5  # Small number to ignore noise

# Find rows with content
content_rows = np.where(row_sums > content_threshold)[0]

if len(content_rows) == 0:
    # No content, nothing to crop
    cropped = image
else:
    # Find content blocks separated by significant gaps
    min_gap_size = 10  # Adjusted to better separate the bottom text
    
    blocks = []
    start = content_rows[0]
    prev = content_rows[0]
    for row in content_rows[1:]:
        gap = row - prev - 1
        if gap >= min_gap_size:
            blocks.append((start, prev))
            start = row
        prev = row
    blocks.append((start, prev))  # Add the last block
    
    # Assume the main content is all blocks except the last small one (bottom text)
    if len(blocks) > 1:
        last_block_height = blocks[-1][1] - blocks[-1][0] + 1
        if last_block_height < 100:  # Assume bottom text block is smaller than 100 pixels (adjust as needed)
            # Crop to the end of the previous block to remove extra white space
            crop_y = blocks[-2][1] + 1
        else:
            crop_y = height
    else:
        crop_y = height
    
    # Initial crop
    cropped = image[0:crop_y, 0:width]

# Further trim any remaining white space at top and bottom
if cropped.size > 0:
    gray_c = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
    _, bin_c = cv2.threshold(gray_c, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    row_sums_c = np.sum(bin_c // 255, axis=1)
    content_rows_c = np.where(row_sums_c > content_threshold)[0]
    if len(content_rows_c) > 0:
        top = content_rows_c[0]
        bottom = content_rows_c[-1] + 1
        cropped = cropped[top:bottom, 0:width]

# Save the cropped image
output_path = 'output.jpg'
cv2.imwrite(output_path, cropped)

print(f"Cropped image saved to {output_path}")

Cropped image saved to output.jpg


In [8]:
import cv2
import numpy as np

# Load the image (replace 'input.jpg' with your image file path)
image_path = 'img_013.jpg'
image = cv2.imread(image_path)
if image is None:
    raise ValueError("Image not found or unable to load.")

height, width = image.shape[:2]

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Binarize the image (invert so black pixels are 255)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Calculate the number of black pixels per row
row_sums = np.sum(binary // 255, axis=1)

# Threshold for considering a row as having content (adjust if needed)
content_threshold = 5  # Small number to ignore noise

# Find rows with content
content_rows = np.where(row_sums > content_threshold)[0]

if len(content_rows) == 0:
    # No content, nothing to crop
    cropped = image
else:
    # Find content blocks separated by significant gaps
    min_gap_size = 10  # Adjusted to better separate the bottom text
    
    blocks = []
    start = content_rows[0]
    prev = content_rows[0]
    for row in content_rows[1:]:
        gap = row - prev - 1
        if gap >= min_gap_size:
            blocks.append((start, prev))
            start = row
        prev = row
    blocks.append((start, prev))  # Add the last block
    
    # Assume the main content is all blocks except the last small one (bottom text)
    if len(blocks) > 1:
        last_block_height = blocks[-1][1] - blocks[-1][0] + 1
        if last_block_height < 100:  # Assume bottom text block is smaller than 100 pixels (adjust as needed)
            # Crop to the end of the previous block to remove extra white space
            crop_y = blocks[-2][1] + 1
        else:
            crop_y = height
    else:
        crop_y = height
    
    # Initial crop
    cropped = image[0:crop_y, 0:width]

# Further trim any remaining white space at top and bottom
if cropped.size > 0:
    gray_c = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
    _, bin_c = cv2.threshold(gray_c, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    row_sums_c = np.sum(bin_c // 255, axis=1)
    content_rows_c = np.where(row_sums_c > content_threshold)[0]
    if len(content_rows_c) > 0:
        top = content_rows_c[0]
        bottom = content_rows_c[-1] + 1
        cropped = cropped[top:bottom, 0:width]

# Add about 2 white lines (pixels) at the bottom as padding
if cropped.size > 0:
    padding = 10  # Number of white rows to add; adjust if "lines" means more pixels
    padded_height = cropped.shape[0] + padding
    padded = np.full((padded_height, width, 3), 255, dtype=np.uint8)  # White background
    padded[0:cropped.shape[0], :] = cropped
    cropped = padded

# Save the cropped image
output_path = 'output.jpg'
cv2.imwrite(output_path, cropped)

print(f"Cropped image saved to {output_path}")

Cropped image saved to output.jpg


In [9]:
import cv2
import numpy as np
import os

def crop_single_image(image):
    """
    Crops the image to remove the bottom text block, trims extra white space,
    and adds 2 white lines at the bottom.
    """
    height, width = image.shape[:2]

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Binarize the image (invert so black pixels are 255)
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Calculate the number of black pixels per row
    row_sums = np.sum(binary // 255, axis=1)

    # Threshold for considering a row as having content (adjust if needed)
    content_threshold = 5  # Small number to ignore noise

    # Find rows with content
    content_rows = np.where(row_sums > content_threshold)[0]

    if len(content_rows) == 0:
        # No content, return original
        return image
    else:
        # Find content blocks separated by significant gaps
        min_gap_size = 10  # Adjusted to better separate the bottom text
        
        blocks = []
        start = content_rows[0]
        prev = content_rows[0]
        for row in content_rows[1:]:
            gap = row - prev - 1
            if gap >= min_gap_size:
                blocks.append((start, prev))
                start = row
            prev = row
        blocks.append((start, prev))  # Add the last block
        
        # Assume the main content is all blocks except the last small one (bottom text)
        if len(blocks) > 1:
            last_block_height = blocks[-1][1] - blocks[-1][0] + 1
            if last_block_height < 100:  # Assume bottom text block is smaller than 100 pixels (adjust as needed)
                # Crop to the end of the previous block to remove extra white space
                crop_y = blocks[-2][1] + 1
            else:
                crop_y = height
        else:
            crop_y = height
        
        # Initial crop
        cropped = image[0:crop_y, 0:width]

    # Further trim any remaining white space at top and bottom
    if cropped.size > 0:
        gray_c = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
        _, bin_c = cv2.threshold(gray_c, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        row_sums_c = np.sum(bin_c // 255, axis=1)
        content_rows_c = np.where(row_sums_c > content_threshold)[0]
        if len(content_rows_c) > 0:
            top = content_rows_c[0]
            bottom = content_rows_c[-1] + 1
            cropped = cropped[top:bottom, 0:width]

    # Add about 2 white lines (pixels) at the bottom as padding
    if cropped.size > 0:
        padding = 2  # Number of white rows to add; adjust if "lines" means more pixels
        padded_height = cropped.shape[0] + padding
        padded = np.full((padded_height, width, 3), 255, dtype=np.uint8)  # White background
        padded[0:cropped.shape[0], :] = cropped
        cropped = padded

    return cropped

# Set the root folder here (replace with your actual path)
root_folder = 'DragonBallCopy'  # e.g., '/Users/yourname/images'

# Supported image extensions
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')  # Add more if needed

# Traverse the root folder and all subfolders
for subdir, _, files in os.walk(root_folder):
    print("subdir :", subdir)
    for file in files:
        if file.lower().endswith(image_extensions):
            input_path = os.path.join(subdir, file)
            print(f"Processing: {input_path}")
            
            # Load the image
            image = cv2.imread(input_path)
            if image is None:
                print(f"Failed to load: {input_path}")
                continue
            
            # Crop the image
            cropped = crop_single_image(image)
            
            # Save with the same name in the same location (overwrites original)
            cv2.imwrite(input_path, cropped)
            print(f"Saved cropped image: {input_path}")

print("Processing complete.")

subdir : DragonBallCopy
subdir : DragonBallCopy\001
Processing: DragonBallCopy\001\img_001.jpg
Saved cropped image: DragonBallCopy\001\img_001.jpg
Processing: DragonBallCopy\001\img_002.jpg
Saved cropped image: DragonBallCopy\001\img_002.jpg
Processing: DragonBallCopy\001\img_003.jpg
Saved cropped image: DragonBallCopy\001\img_003.jpg
Processing: DragonBallCopy\001\img_004.jpg
Saved cropped image: DragonBallCopy\001\img_004.jpg
Processing: DragonBallCopy\001\img_005.jpg
Saved cropped image: DragonBallCopy\001\img_005.jpg
Processing: DragonBallCopy\001\img_006.jpg
Saved cropped image: DragonBallCopy\001\img_006.jpg
Processing: DragonBallCopy\001\img_007.jpg
Saved cropped image: DragonBallCopy\001\img_007.jpg
Processing: DragonBallCopy\001\img_008.jpg
Saved cropped image: DragonBallCopy\001\img_008.jpg
Processing: DragonBallCopy\001\img_009.jpg
Saved cropped image: DragonBallCopy\001\img_009.jpg
Processing: DragonBallCopy\001\img_010.jpg
Saved cropped image: DragonBallCopy\001\img_010.jp