In [49]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

def load_and_preprocess_image(image_path, closing_kernel = np.array([[0,0,0,0,0],[0,1,1,1,0],[1,1,1,1,1],[0,1,1,1,0],[0,0,1,0,0]],np.uint8)):

    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    binary_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
   
    closing_kernel= closing_kernel
    processed_image = cv2.morphologyEx(binary_image, cv2.MORPH_CLOSE, closing_kernel, iterations=3)

    dilation_kernel = np.ones((2, 2), np.uint8)
    processed_image = cv2.dilate(processed_image, dilation_kernel, iterations=1)

    return image, processed_image



def draw_bounding_boxes(image, bounding_boxes, color=(0, 255, 0), thickness=2):

    output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    
    for (x, y, w, h) in bounding_boxes:
        cv2.rectangle(output_image, (x, y), (x + w, y + h), color, thickness)
    
    return output_image

def display_image(image, title="Optimized Character Segmentation"):
 
    plt.figure(figsize=(10, 10))
    plt.imshow(image, cmap='gray')
    plt.axis('off')
    plt.title(title)
    plt.show()



In [50]:
def split_large_boxes(processed_image, stats, original_image, width_threshold=1.3, height_threshold=1.3):
  
    new_boxes = [] 
    stats_len = len(stats)
    all_widths = [stats[i][2] for i in range(0,stats_len)]
   # print(all_widths)
    all_heights = [stats[i][3] for i in range(0,stats_len)]
    all_area = [stats[i][4] for i in range(0,stats_len)]
    avg_width = np.mean(all_widths) if all_widths else 20
    #print(avg_width)
    avg_height = np.mean(all_heights) if all_heights else 30
    #print(avg_height)
    avg_area = np.mean(all_area) if all_area else 100
    #print(avg_area)
    for x, y, w, h, area in stats:
        aspect_ratio_width = w / h
        aspect_ratio_height = h / w
       
        if aspect_ratio_width > width_threshold and w > 1.4*avg_width and area > 1.55*avg_area: 
            #print(area) 
            refined_boxes = refine_extreme_aspect_boxes(original_image, x,y,w,h, avg_height,avg_width, vertical = True)
            #print(refined_boxes)
            for x,y,w,h in refined_boxes:
                new_boxes.append((x,y,w,h))
        elif aspect_ratio_height > height_threshold and h > 1.26* avg_height and area > 1.6* avg_area:  
            refined_boxes = refine_extreme_aspect_boxes(original_image, x,y,w,h, avg_height,avg_width)
            #print(refined_boxes)
            for x,y,w,h in refined_boxes:
                new_boxes.append((x, y, w, h))
        else:
            new_boxes.append((x, y, w, h))
      
   # print(new_boxes)

    return new_boxes

def refine_extreme_aspect_boxes(image, x,y,w,h, avg_height, avg_width, closing_kernel_size=(5,3), tolerance_ratio=0.5, vertical = False, iteration = 3):
    if vertical:
        closing_kernel_size = (1,3)
        iteration = 1
    refined_boxes = []
    image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
            # Extract the region of interest (ROI) from the original image
    boxed_region = image[y:y+h, x:x+w]
    #boxed_img = draw_bounding_boxes(image,[(x,y,w,h)])
    #display_image(boxed_img)

            # Apply threshold and smaller morphological closing to this ROI
          
    closing_kernel = np.ones(closing_kernel_size, np.uint8)
    boxed_region_closed = cv2.morphologyEx(boxed_region, cv2.MORPH_CLOSE, closing_kernel, iterations=iteration)
    
   

            # Detect connected components in the ROI
    num, _, stats, _ = cv2.connectedComponentsWithStats(boxed_region_closed, connectivity=4)

    best_local_box = None
    for i in range(1, num):
        lx, ly, lw, lh, larea = stats[i]
        if lw < 0.5*avg_width or lh < 0.5*avg_height:
            continue
                # Check if the box is close to the average dimensions
        if (abs(lw - avg_width) / avg_width < tolerance_ratio and
            abs(lh - avg_height) / avg_height < tolerance_ratio):
            best_local_box = (x + lx, y + ly, lw, lh)
        if best_local_box is None:
            #print(f"best_local_box:{best_local_box}")
            refined_boxes.append((x, y, w, h))
 


    #boxed_img = draw_bounding_boxes(image,refined_boxes)
    #display_image(boxed_img, title=f"{refined_boxes}")
     
    return refined_boxes

def detect_large_characters(processed_image, original_image, min_area=50, max_area=5000, dynamic_factor=0.45, padding_ratio=0.1):
   
    num_labels, labels, stats, centboxed_regionds = cv2.connectedComponentsWithStats(processed_image, connectivity=8)
    w_thresh = 10
    h_thresh = 10
    img_h, img_w = processed_image.shape
    large_char_stats= []
    for x, y, w, h, area in stats:
        if (w > w_thresh and h >= h_thresh):
            large_char_stats.append((x, y, w, h, area))
    large_char_num = len(large_char_stats)
    all_widths = [large_char_stats[i][2] for i in range(1, large_char_num)]
    all_heights = [large_char_stats[i][3] for i in range(1, large_char_num)]

    avg_width = np.mean(all_widths) if all_widths else 20
    avg_height = np.mean(all_heights) if all_heights else 30

    min_width = max(15, int(avg_width * dynamic_factor))
    max_width = int(avg_width * 2.4)
    min_height = max(15, int(avg_height * dynamic_factor))
    max_height = int(avg_height * 2.4)

    bounding_boxes = []
    for i in range(1, large_char_num):  
        x, y, w, h, area = large_char_stats[i]

        if min_area < area < max_area and min_width < w < max_width and min_height < h < max_height:
            bounding_boxes.append((x, y, w, h, area))

    refined_boxes = split_large_boxes(processed_image, bounding_boxes, original_image)

    final_boxes = []
    for x, y, w, h in refined_boxes:
        padding_w = int(w * padding_ratio)
        padding_h = int(h * padding_ratio)

        x_new = max(0, x - padding_w)
        y_new = max(0, y - padding_h)
        w_new = min(img_w - x_new, w + 2 * padding_w)
        h_new = min(img_h - y_new, h + 2 * padding_h)

        final_boxes.append((x_new, y_new, w_new, h_new))
    
    return final_boxes


In [51]:
import os
def save_image(image, input_path, output_folder):
    """saves image"""
    filename = os.path.basename(input_path) 
    output_path = os.path.join(output_folder, filename) 
    os.makedirs(output_folder, exist_ok=True) 
    #save without compression
    cv2.imwrite(output_path, image, [cv2.IMWRITE_PNG_COMPRESSION, 0])
    print(f"saved:{output_path}")

import os
import cv2
import numpy as np
from tqdm import tqdm

def save_characters_from_boxes(image, boxes, image_path, output_root='segmented_chars_img', min_size=(5, 5)):
    """
    Save each character from bounding boxes into a structured output folder.
    Skips very small boxes (less than min_size).
    """
    # Prepare paths
    image_dir = os.path.dirname(image_path)           # e.g., output/RC04844
    subfolder = os.path.basename(image_dir)            # e.g., RC04844
    image_filename = os.path.basename(image_path)      # e.g., page_1.png
    image_name = os.path.splitext(image_filename)[0]   # e.g., page_1

    output_folder = os.path.join(output_root, subfolder, image_name)
    os.makedirs(output_folder, exist_ok=True)

    # Sort boxes top to bottom, then left to right
    boxes_sorted = sorted(boxes, key=lambda b: (b[1], b[0]))

    count = 0
    for (x, y, w, h) in boxes_sorted:
        # Skip tiny noisy boxes
        if w < min_size[0] or h < min_size[1]:
            continue

        crop = image[y:y+h, x:x+w]
        count += 1
        save_path = os.path.join(output_folder, f"c{count}.png")
        cv2.imwrite(save_path, crop, [cv2.IMWRITE_PNG_COMPRESSION, 0])
        print(f"saved:{save_path}")

def process_all_images(input_root_folder='output_sample', output_root_folder='seg_visual_img', char_output_root='segmented_chars_img'):
    """
    Processes all images under input_root_folder (your "output" folder).
    Saves segmented characters under char_output_root,
    and full images with drawn boxes under output_root_folder.
    """
    for root, _, files in os.walk(input_root_folder):

        relative_path = os.path.relpath(root, input_root_folder)
        output_folder = os.path.join(output_root_folder, relative_path)
        os.makedirs(output_folder, exist_ok=True)

        # Sort filenames naturally (page_1.png before page_10.png)
        png_files = sorted(
            [file for file in files if file.lower().endswith(".png")],
            key=lambda x: (int(x.split('_')[-1].split('.')[0]) if '_' in x else x)
        )

        if not png_files:
            print(f"No PNG images found in {root}")
            continue

        for file in tqdm(png_files, desc=f"Processing {relative_path}"):
            input_image_path = os.path.join(root, file)
            

                # Load and preprocess
            original_image, processed_image = load_and_preprocess_image(input_image_path)
            print(input_image_path)
                # Detect characters
            character_boxes = detect_large_characters(processed_image, original_image)

                # Draw bounding boxes
            output_image_segmented = draw_bounding_boxes(original_image, character_boxes)

                # Save the segmented full image (for visualization)
            save_image(output_image_segmented, input_image_path, output_root_folder)

                # Save each cropped character
            save_characters_from_boxes(original_image, character_boxes, input_image_path, output_root=char_output_root)



In [52]:

input_folder = "output_sample"

process_all_images(input_root_folder=input_folder, output_root_folder="seg_visual_img", char_output_root='segmented_chars_img')


No PNG images found in output_sample


Processing RC04844:   0%|          | 0/4 [00:00<?, ?it/s]

output_sample\RC04844\page_1.png


Processing RC04844:  25%|██▌       | 1/4 [00:00<00:01,  2.08it/s]

saved:seg_visual_img\page_1.png
saved:segmented_chars_img\RC04844\page_1\c1.png
saved:segmented_chars_img\RC04844\page_1\c2.png
saved:segmented_chars_img\RC04844\page_1\c3.png
saved:segmented_chars_img\RC04844\page_1\c4.png
saved:segmented_chars_img\RC04844\page_1\c5.png
saved:segmented_chars_img\RC04844\page_1\c6.png
saved:segmented_chars_img\RC04844\page_1\c7.png
saved:segmented_chars_img\RC04844\page_1\c8.png
saved:segmented_chars_img\RC04844\page_1\c9.png
saved:segmented_chars_img\RC04844\page_1\c10.png
saved:segmented_chars_img\RC04844\page_1\c11.png
saved:segmented_chars_img\RC04844\page_1\c12.png
saved:segmented_chars_img\RC04844\page_1\c13.png
saved:segmented_chars_img\RC04844\page_1\c14.png
saved:segmented_chars_img\RC04844\page_1\c15.png
saved:segmented_chars_img\RC04844\page_1\c16.png
saved:segmented_chars_img\RC04844\page_1\c17.png
saved:segmented_chars_img\RC04844\page_1\c18.png
saved:segmented_chars_img\RC04844\page_1\c19.png
saved:segmented_chars_img\RC04844\page_1\c20.p

Processing RC04844:  50%|█████     | 2/4 [00:00<00:00,  2.07it/s]

saved:seg_visual_img\page_2.png
saved:segmented_chars_img\RC04844\page_2\c1.png
saved:segmented_chars_img\RC04844\page_2\c2.png
saved:segmented_chars_img\RC04844\page_2\c3.png
saved:segmented_chars_img\RC04844\page_2\c4.png
saved:segmented_chars_img\RC04844\page_2\c5.png
saved:segmented_chars_img\RC04844\page_2\c6.png
saved:segmented_chars_img\RC04844\page_2\c7.png
saved:segmented_chars_img\RC04844\page_2\c8.png
saved:segmented_chars_img\RC04844\page_2\c9.png
saved:segmented_chars_img\RC04844\page_2\c10.png
saved:segmented_chars_img\RC04844\page_2\c11.png
saved:segmented_chars_img\RC04844\page_2\c12.png
saved:segmented_chars_img\RC04844\page_2\c13.png
saved:segmented_chars_img\RC04844\page_2\c14.png
saved:segmented_chars_img\RC04844\page_2\c15.png
saved:segmented_chars_img\RC04844\page_2\c16.png
saved:segmented_chars_img\RC04844\page_2\c17.png
saved:segmented_chars_img\RC04844\page_2\c18.png
saved:segmented_chars_img\RC04844\page_2\c19.png
saved:segmented_chars_img\RC04844\page_2\c20.p

Processing RC04844:  75%|███████▌  | 3/4 [00:01<00:00,  1.94it/s]

saved:segmented_chars_img\RC04844\page_3\c447.png
saved:segmented_chars_img\RC04844\page_3\c448.png
saved:segmented_chars_img\RC04844\page_3\c449.png
saved:segmented_chars_img\RC04844\page_3\c450.png
saved:segmented_chars_img\RC04844\page_3\c451.png
saved:segmented_chars_img\RC04844\page_3\c452.png
saved:segmented_chars_img\RC04844\page_3\c453.png
saved:segmented_chars_img\RC04844\page_3\c454.png
saved:segmented_chars_img\RC04844\page_3\c455.png
saved:segmented_chars_img\RC04844\page_3\c456.png
saved:segmented_chars_img\RC04844\page_3\c457.png
output_sample\RC04844\page_4.png


Processing RC04844: 100%|██████████| 4/4 [00:02<00:00,  1.85it/s]


saved:seg_visual_img\page_4.png
saved:segmented_chars_img\RC04844\page_4\c1.png
saved:segmented_chars_img\RC04844\page_4\c2.png
saved:segmented_chars_img\RC04844\page_4\c3.png
saved:segmented_chars_img\RC04844\page_4\c4.png
saved:segmented_chars_img\RC04844\page_4\c5.png
saved:segmented_chars_img\RC04844\page_4\c6.png
saved:segmented_chars_img\RC04844\page_4\c7.png
saved:segmented_chars_img\RC04844\page_4\c8.png
saved:segmented_chars_img\RC04844\page_4\c9.png
saved:segmented_chars_img\RC04844\page_4\c10.png
saved:segmented_chars_img\RC04844\page_4\c11.png
saved:segmented_chars_img\RC04844\page_4\c12.png
saved:segmented_chars_img\RC04844\page_4\c13.png
saved:segmented_chars_img\RC04844\page_4\c14.png
saved:segmented_chars_img\RC04844\page_4\c15.png
saved:segmented_chars_img\RC04844\page_4\c16.png
saved:segmented_chars_img\RC04844\page_4\c17.png
saved:segmented_chars_img\RC04844\page_4\c18.png
saved:segmented_chars_img\RC04844\page_4\c19.png
saved:segmented_chars_img\RC04844\page_4\c20.p

Processing RC04845:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04845\page_1.png


Processing RC04845: 100%|██████████| 1/1 [00:00<00:00,  2.64it/s]


saved:seg_visual_img\page_1.png
saved:segmented_chars_img\RC04845\page_1\c1.png
saved:segmented_chars_img\RC04845\page_1\c2.png
saved:segmented_chars_img\RC04845\page_1\c3.png
saved:segmented_chars_img\RC04845\page_1\c4.png
saved:segmented_chars_img\RC04845\page_1\c5.png
saved:segmented_chars_img\RC04845\page_1\c6.png
saved:segmented_chars_img\RC04845\page_1\c7.png
saved:segmented_chars_img\RC04845\page_1\c8.png
saved:segmented_chars_img\RC04845\page_1\c9.png
saved:segmented_chars_img\RC04845\page_1\c10.png
saved:segmented_chars_img\RC04845\page_1\c11.png
saved:segmented_chars_img\RC04845\page_1\c12.png
saved:segmented_chars_img\RC04845\page_1\c13.png
saved:segmented_chars_img\RC04845\page_1\c14.png
saved:segmented_chars_img\RC04845\page_1\c15.png
saved:segmented_chars_img\RC04845\page_1\c16.png
saved:segmented_chars_img\RC04845\page_1\c17.png
saved:segmented_chars_img\RC04845\page_1\c18.png
saved:segmented_chars_img\RC04845\page_1\c19.png
saved:segmented_chars_img\RC04845\page_1\c20.p

Processing RC04846:   0%|          | 0/3 [00:00<?, ?it/s]

output_sample\RC04846\page_1.png
saved:seg_visual_img\page_1.png
saved:segmented_chars_img\RC04846\page_1\c1.png
saved:segmented_chars_img\RC04846\page_1\c2.png
saved:segmented_chars_img\RC04846\page_1\c3.png
saved:segmented_chars_img\RC04846\page_1\c4.png
saved:segmented_chars_img\RC04846\page_1\c5.png
saved:segmented_chars_img\RC04846\page_1\c6.png
saved:segmented_chars_img\RC04846\page_1\c7.png
saved:segmented_chars_img\RC04846\page_1\c8.png
saved:segmented_chars_img\RC04846\page_1\c9.png
saved:segmented_chars_img\RC04846\page_1\c10.png
saved:segmented_chars_img\RC04846\page_1\c11.png
saved:segmented_chars_img\RC04846\page_1\c12.png
saved:segmented_chars_img\RC04846\page_1\c13.png
saved:segmented_chars_img\RC04846\page_1\c14.png
saved:segmented_chars_img\RC04846\page_1\c15.png
saved:segmented_chars_img\RC04846\page_1\c16.png
saved:segmented_chars_img\RC04846\page_1\c17.png
saved:segmented_chars_img\RC04846\page_1\c18.png
saved:segmented_chars_img\RC04846\page_1\c19.png
saved:segment

Processing RC04846:  33%|███▎      | 1/3 [00:00<00:00,  2.61it/s]

saved:segmented_chars_img\RC04846\page_1\c80.png
saved:segmented_chars_img\RC04846\page_1\c81.png
saved:segmented_chars_img\RC04846\page_1\c82.png
saved:segmented_chars_img\RC04846\page_1\c83.png
saved:segmented_chars_img\RC04846\page_1\c84.png
saved:segmented_chars_img\RC04846\page_1\c85.png
saved:segmented_chars_img\RC04846\page_1\c86.png
saved:segmented_chars_img\RC04846\page_1\c87.png
saved:segmented_chars_img\RC04846\page_1\c88.png
saved:segmented_chars_img\RC04846\page_1\c89.png
saved:segmented_chars_img\RC04846\page_1\c90.png
saved:segmented_chars_img\RC04846\page_1\c91.png
saved:segmented_chars_img\RC04846\page_1\c92.png
saved:segmented_chars_img\RC04846\page_1\c93.png
saved:segmented_chars_img\RC04846\page_1\c94.png
saved:segmented_chars_img\RC04846\page_1\c95.png
saved:segmented_chars_img\RC04846\page_1\c96.png
saved:segmented_chars_img\RC04846\page_1\c97.png
saved:segmented_chars_img\RC04846\page_1\c98.png
saved:segmented_chars_img\RC04846\page_1\c99.png
saved:segmented_char

Processing RC04846:  67%|██████▋   | 2/3 [00:00<00:00,  2.58it/s]

saved:seg_visual_img\page_2.png
saved:segmented_chars_img\RC04846\page_2\c1.png
saved:segmented_chars_img\RC04846\page_2\c2.png
saved:segmented_chars_img\RC04846\page_2\c3.png
saved:segmented_chars_img\RC04846\page_2\c4.png
saved:segmented_chars_img\RC04846\page_2\c5.png
saved:segmented_chars_img\RC04846\page_2\c6.png
saved:segmented_chars_img\RC04846\page_2\c7.png
saved:segmented_chars_img\RC04846\page_2\c8.png
saved:segmented_chars_img\RC04846\page_2\c9.png
saved:segmented_chars_img\RC04846\page_2\c10.png
saved:segmented_chars_img\RC04846\page_2\c11.png
saved:segmented_chars_img\RC04846\page_2\c12.png
saved:segmented_chars_img\RC04846\page_2\c13.png
saved:segmented_chars_img\RC04846\page_2\c14.png
saved:segmented_chars_img\RC04846\page_2\c15.png
saved:segmented_chars_img\RC04846\page_2\c16.png
saved:segmented_chars_img\RC04846\page_2\c17.png
saved:segmented_chars_img\RC04846\page_2\c18.png
saved:segmented_chars_img\RC04846\page_2\c19.png
saved:segmented_chars_img\RC04846\page_2\c20.p

Processing RC04846: 100%|██████████| 3/3 [00:01<00:00,  2.94it/s]


saved:seg_visual_img\page_3.png
saved:segmented_chars_img\RC04846\page_3\c1.png
saved:segmented_chars_img\RC04846\page_3\c2.png
saved:segmented_chars_img\RC04846\page_3\c3.png
saved:segmented_chars_img\RC04846\page_3\c4.png
saved:segmented_chars_img\RC04846\page_3\c5.png
saved:segmented_chars_img\RC04846\page_3\c6.png
saved:segmented_chars_img\RC04846\page_3\c7.png
saved:segmented_chars_img\RC04846\page_3\c8.png
saved:segmented_chars_img\RC04846\page_3\c9.png
saved:segmented_chars_img\RC04846\page_3\c10.png
saved:segmented_chars_img\RC04846\page_3\c11.png
saved:segmented_chars_img\RC04846\page_3\c12.png
saved:segmented_chars_img\RC04846\page_3\c13.png
saved:segmented_chars_img\RC04846\page_3\c14.png
saved:segmented_chars_img\RC04846\page_3\c15.png
saved:segmented_chars_img\RC04846\page_3\c16.png
saved:segmented_chars_img\RC04846\page_3\c17.png
saved:segmented_chars_img\RC04846\page_3\c18.png
saved:segmented_chars_img\RC04846\page_3\c19.png
saved:segmented_chars_img\RC04846\page_3\c20.p

Processing RC04847:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04847\page_2.png
saved:seg_visual_img\page_2.png


Processing RC04847: 100%|██████████| 1/1 [00:00<00:00,  2.99it/s]


saved:segmented_chars_img\RC04847\page_2\c1.png
saved:segmented_chars_img\RC04847\page_2\c2.png
saved:segmented_chars_img\RC04847\page_2\c3.png
saved:segmented_chars_img\RC04847\page_2\c4.png
saved:segmented_chars_img\RC04847\page_2\c5.png
saved:segmented_chars_img\RC04847\page_2\c6.png
saved:segmented_chars_img\RC04847\page_2\c7.png
saved:segmented_chars_img\RC04847\page_2\c8.png
saved:segmented_chars_img\RC04847\page_2\c9.png
saved:segmented_chars_img\RC04847\page_2\c10.png
saved:segmented_chars_img\RC04847\page_2\c11.png
saved:segmented_chars_img\RC04847\page_2\c12.png
saved:segmented_chars_img\RC04847\page_2\c13.png
saved:segmented_chars_img\RC04847\page_2\c14.png
saved:segmented_chars_img\RC04847\page_2\c15.png
saved:segmented_chars_img\RC04847\page_2\c16.png
saved:segmented_chars_img\RC04847\page_2\c17.png
saved:segmented_chars_img\RC04847\page_2\c18.png
saved:segmented_chars_img\RC04847\page_2\c19.png
saved:segmented_chars_img\RC04847\page_2\c20.png
saved:segmented_chars_img\RC0

Processing RC04848:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04848\page_2.png


Processing RC04848: 100%|██████████| 1/1 [00:00<00:00,  1.27it/s]


saved:seg_visual_img\page_2.png
saved:segmented_chars_img\RC04848\page_2\c1.png
saved:segmented_chars_img\RC04848\page_2\c2.png
saved:segmented_chars_img\RC04848\page_2\c3.png
saved:segmented_chars_img\RC04848\page_2\c4.png
saved:segmented_chars_img\RC04848\page_2\c5.png
saved:segmented_chars_img\RC04848\page_2\c6.png
saved:segmented_chars_img\RC04848\page_2\c7.png
saved:segmented_chars_img\RC04848\page_2\c8.png
saved:segmented_chars_img\RC04848\page_2\c9.png
saved:segmented_chars_img\RC04848\page_2\c10.png
saved:segmented_chars_img\RC04848\page_2\c11.png
saved:segmented_chars_img\RC04848\page_2\c12.png
saved:segmented_chars_img\RC04848\page_2\c13.png
saved:segmented_chars_img\RC04848\page_2\c14.png
saved:segmented_chars_img\RC04848\page_2\c15.png
saved:segmented_chars_img\RC04848\page_2\c16.png
saved:segmented_chars_img\RC04848\page_2\c17.png
saved:segmented_chars_img\RC04848\page_2\c18.png
saved:segmented_chars_img\RC04848\page_2\c19.png
saved:segmented_chars_img\RC04848\page_2\c20.p

Processing RC04849:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04849\page_1.png
saved:seg_visual_img\page_1.png
saved:segmented_chars_img\RC04849\page_1\c1.png
saved:segmented_chars_img\RC04849\page_1\c2.png
saved:segmented_chars_img\RC04849\page_1\c3.png
saved:segmented_chars_img\RC04849\page_1\c4.png
saved:segmented_chars_img\RC04849\page_1\c5.png
saved:segmented_chars_img\RC04849\page_1\c6.png
saved:segmented_chars_img\RC04849\page_1\c7.png
saved:segmented_chars_img\RC04849\page_1\c8.png
saved:segmented_chars_img\RC04849\page_1\c9.png
saved:segmented_chars_img\RC04849\page_1\c10.png
saved:segmented_chars_img\RC04849\page_1\c11.png
saved:segmented_chars_img\RC04849\page_1\c12.png
saved:segmented_chars_img\RC04849\page_1\c13.png
saved:segmented_chars_img\RC04849\page_1\c14.png
saved:segmented_chars_img\RC04849\page_1\c15.png
saved:segmented_chars_img\RC04849\page_1\c16.png
saved:segmented_chars_img\RC04849\page_1\c17.png
saved:segmented_chars_img\RC04849\page_1\c18.png
saved:segmented_chars_img\RC04849\page_1\c19.png
saved:segment

Processing RC04849: 100%|██████████| 1/1 [00:00<00:00,  3.67it/s]


saved:segmented_chars_img\RC04849\page_1\c99.png
saved:segmented_chars_img\RC04849\page_1\c100.png
saved:segmented_chars_img\RC04849\page_1\c101.png
saved:segmented_chars_img\RC04849\page_1\c102.png
saved:segmented_chars_img\RC04849\page_1\c103.png
saved:segmented_chars_img\RC04849\page_1\c104.png
saved:segmented_chars_img\RC04849\page_1\c105.png
saved:segmented_chars_img\RC04849\page_1\c106.png
saved:segmented_chars_img\RC04849\page_1\c107.png
saved:segmented_chars_img\RC04849\page_1\c108.png
saved:segmented_chars_img\RC04849\page_1\c109.png
saved:segmented_chars_img\RC04849\page_1\c110.png
saved:segmented_chars_img\RC04849\page_1\c111.png
saved:segmented_chars_img\RC04849\page_1\c112.png
saved:segmented_chars_img\RC04849\page_1\c113.png
saved:segmented_chars_img\RC04849\page_1\c114.png
saved:segmented_chars_img\RC04849\page_1\c115.png
saved:segmented_chars_img\RC04849\page_1\c116.png
saved:segmented_chars_img\RC04849\page_1\c117.png
saved:segmented_chars_img\RC04849\page_1\c118.png
s

Processing RC04850:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04850\page_1.png


Processing RC04850: 100%|██████████| 1/1 [00:00<00:00,  2.44it/s]


saved:seg_visual_img\page_1.png
saved:segmented_chars_img\RC04850\page_1\c1.png
saved:segmented_chars_img\RC04850\page_1\c2.png
saved:segmented_chars_img\RC04850\page_1\c3.png
saved:segmented_chars_img\RC04850\page_1\c4.png
saved:segmented_chars_img\RC04850\page_1\c5.png
saved:segmented_chars_img\RC04850\page_1\c6.png
saved:segmented_chars_img\RC04850\page_1\c7.png
saved:segmented_chars_img\RC04850\page_1\c8.png
saved:segmented_chars_img\RC04850\page_1\c9.png
saved:segmented_chars_img\RC04850\page_1\c10.png
saved:segmented_chars_img\RC04850\page_1\c11.png
saved:segmented_chars_img\RC04850\page_1\c12.png
saved:segmented_chars_img\RC04850\page_1\c13.png
saved:segmented_chars_img\RC04850\page_1\c14.png
saved:segmented_chars_img\RC04850\page_1\c15.png
saved:segmented_chars_img\RC04850\page_1\c16.png
saved:segmented_chars_img\RC04850\page_1\c17.png
saved:segmented_chars_img\RC04850\page_1\c18.png
saved:segmented_chars_img\RC04850\page_1\c19.png
saved:segmented_chars_img\RC04850\page_1\c20.p

Processing RC04851:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04851\page_1.png
saved:seg_visual_img\page_1.png


Processing RC04851: 100%|██████████| 1/1 [00:00<00:00,  3.24it/s]


saved:segmented_chars_img\RC04851\page_1\c1.png
saved:segmented_chars_img\RC04851\page_1\c2.png
saved:segmented_chars_img\RC04851\page_1\c3.png
saved:segmented_chars_img\RC04851\page_1\c4.png
saved:segmented_chars_img\RC04851\page_1\c5.png
saved:segmented_chars_img\RC04851\page_1\c6.png
saved:segmented_chars_img\RC04851\page_1\c7.png
saved:segmented_chars_img\RC04851\page_1\c8.png
saved:segmented_chars_img\RC04851\page_1\c9.png
saved:segmented_chars_img\RC04851\page_1\c10.png
saved:segmented_chars_img\RC04851\page_1\c11.png
saved:segmented_chars_img\RC04851\page_1\c12.png
saved:segmented_chars_img\RC04851\page_1\c13.png
saved:segmented_chars_img\RC04851\page_1\c14.png
saved:segmented_chars_img\RC04851\page_1\c15.png
saved:segmented_chars_img\RC04851\page_1\c16.png
saved:segmented_chars_img\RC04851\page_1\c17.png
saved:segmented_chars_img\RC04851\page_1\c18.png
saved:segmented_chars_img\RC04851\page_1\c19.png
saved:segmented_chars_img\RC04851\page_1\c20.png
saved:segmented_chars_img\RC0

Processing RC04852:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04852\page_1.png


Processing RC04852: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]

saved:seg_visual_img\page_1.png
saved:segmented_chars_img\RC04852\page_1\c1.png
saved:segmented_chars_img\RC04852\page_1\c2.png
saved:segmented_chars_img\RC04852\page_1\c3.png
saved:segmented_chars_img\RC04852\page_1\c4.png
saved:segmented_chars_img\RC04852\page_1\c5.png
saved:segmented_chars_img\RC04852\page_1\c6.png
saved:segmented_chars_img\RC04852\page_1\c7.png
saved:segmented_chars_img\RC04852\page_1\c8.png
saved:segmented_chars_img\RC04852\page_1\c9.png
saved:segmented_chars_img\RC04852\page_1\c10.png
saved:segmented_chars_img\RC04852\page_1\c11.png
saved:segmented_chars_img\RC04852\page_1\c12.png
saved:segmented_chars_img\RC04852\page_1\c13.png
saved:segmented_chars_img\RC04852\page_1\c14.png
saved:segmented_chars_img\RC04852\page_1\c15.png
saved:segmented_chars_img\RC04852\page_1\c16.png
saved:segmented_chars_img\RC04852\page_1\c17.png
saved:segmented_chars_img\RC04852\page_1\c18.png
saved:segmented_chars_img\RC04852\page_1\c19.png
saved:segmented_chars_img\RC04852\page_1\c20.p

Processing RC04852: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
Processing RC04853:   0%|          | 0/1 [00:00<?, ?it/s]

output_sample\RC04853\page_2.png


Processing RC04853: 100%|██████████| 1/1 [00:00<00:00,  1.92it/s]

saved:seg_visual_img\page_2.png
saved:segmented_chars_img\RC04853\page_2\c1.png
saved:segmented_chars_img\RC04853\page_2\c2.png
saved:segmented_chars_img\RC04853\page_2\c3.png
saved:segmented_chars_img\RC04853\page_2\c4.png
saved:segmented_chars_img\RC04853\page_2\c5.png
saved:segmented_chars_img\RC04853\page_2\c6.png
saved:segmented_chars_img\RC04853\page_2\c7.png
saved:segmented_chars_img\RC04853\page_2\c8.png
saved:segmented_chars_img\RC04853\page_2\c9.png
saved:segmented_chars_img\RC04853\page_2\c10.png
saved:segmented_chars_img\RC04853\page_2\c11.png
saved:segmented_chars_img\RC04853\page_2\c12.png
saved:segmented_chars_img\RC04853\page_2\c13.png
saved:segmented_chars_img\RC04853\page_2\c14.png
saved:segmented_chars_img\RC04853\page_2\c15.png
saved:segmented_chars_img\RC04853\page_2\c16.png
saved:segmented_chars_img\RC04853\page_2\c17.png
saved:segmented_chars_img\RC04853\page_2\c18.png
saved:segmented_chars_img\RC04853\page_2\c19.png
saved:segmented_chars_img\RC04853\page_2\c20.p


