In [None]:
from concurrent.futures import ProcessPoolExecutor, as_completed
from glob import glob
from pathlib import Path

import cv2
from tqdm.notebook import tqdm
from colorbar import ColorbarTemplate

In [3]:
def process_image_wrapper(args):
    """
    Wrapper function for processing an image, to allow passing multiple arguments.
    """
    image_path, colorbars = args
    try:
        filename = Path(image_path).name
        output_path = output_dir / filename
        
        img = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Skipping {image_path}, unable to read.")
            return
        
        for colorbar in colorbars:
            img = colorbar.erase_from_image(img, threshold=0.8, crop_and_resize=False)
        
        cv2.imwrite(str(output_path), img)
        return f"Processed: {image_path}"
    except Exception as e:
        return f"Error processing {image_path}: {e}"

In [2]:
 colorbars = [#ColorbarTemplate(),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_short.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_vertical_short.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_short_flat.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_2.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_3.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_light.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_long.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_ruler.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_1cm.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_2cm.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_none2.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_none.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_grey.png')
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_short_black-grey.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_vertical_short2.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_mini.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_empty.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_empty_big.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_3_5.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_long_mini.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_just_colors.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_partial_black.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_partial_black_small.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_partial_1_5.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_short_1_2_big.png'),
# ColorbarTemplate(fn='data/colorbar_templates/colorbar_vertical_long.png'), 
ColorbarTemplate(fn='data/colorbar_templates/colorbar_just_colors_new.png'),
ColorbarTemplate(fn='data/colorbar_templates/colorbar_1_2_3colors.png'),
ColorbarTemplate(fn='data/colorbar_templates/colorbar_0_5_grey.png'),
ColorbarTemplate(fn='data/colorbar_templates/colorbar_0_2_vertical.png')]

In [4]:
def process_image(image_path):
    # Extract the filename for saving the processed image
    filename = Path(image_path).name
    output_path = output_dir / filename

    # Load the image directly in greyscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Skipping {image_path}, unable to read.")
        return

    for colorbar in colorbars:
        img = colorbar.erase_from_image(img, threshold=0.8, crop_and_resize=False)

    cv2.imwrite(str(output_path), img)
    return str(output_path)

def process_images_in_parallel(image_paths, max_workers=None):
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_image, path): path for path in image_paths}

        progress = tqdm(as_completed(futures), total=len(image_paths), desc='Processing Images')

        for future in progress:
            result = future.result()  
            progress.set_description("Processing Images")

In [5]:
input_dir = Path('output/images')
output_dir = Path('output/images_preprocessed')
output_dir.mkdir(parents=True, exist_ok=True)  

colorbar_images = {img.name for img in tqdm(input_dir.glob('*.png'))}

colorbar_images_preprocessed = {img.name for img in tqdm(output_dir.glob('*.png'))}

unique_to_colorbar_images = list(colorbar_images - colorbar_images_preprocessed)

len(unique_to_colorbar_images)

453it [00:00, 6765.99it/s]
0it [00:00, ?it/s]


453

In [6]:
images_paths=['output/colorbar_images/'+x for x in unique_to_colorbar_images]

In [7]:
process_images_in_parallel(images_paths)

Processing Images: 100%|██████████| 453/453 [10:02<00:00,  1.33s/it]
