In [1]:
# imports
%matplotlib widget
%config InlineBackend.figure_format = 'retina'
from pathlib import Path
import tempfile

from PIL import Image, ImageCms, TiffTags
import matplotlib.pyplot as plt

from ipywidgets import IntProgress, Label, VBox, Box
from IPython.display import display

In [2]:
# initial variables
root_dir_path = Path('/Users/dlisla/shared_with_docker/')
collection_dir_path = root_dir_path.joinpath('agrutesc')

to_process_dir_path = collection_dir_path.joinpath('1.toScanTailor')

tif_paths_list = sorted(to_process_dir_path.glob('*/out/*.tif'))  # use '*/out/*.tif' for ScanTailor
number_of_tifs = len(tif_paths_list)
print(f'number_of_tifs = {number_of_tifs}')

number_of_tifs = 101


In [3]:
# which TIFF fields do we need to check for bit depth?
image = Image.open(tif_paths_list[0])
for tag in image.tag_v2:
    print(tag)
    print(TiffTags.lookup(tag))

256
TagInfo(value=256, name='ImageWidth', type=4, length=1, enum={})
257
TagInfo(value=257, name='ImageLength', type=4, length=1, enum={})
258
TagInfo(value=258, name='BitsPerSample', type=3, length=0, enum={})
259
TagInfo(value=259, name='Compression', type=3, length=1, enum={'Uncompressed': 1, 'CCITT 1d': 2, 'Group 3 Fax': 3, 'Group 4 Fax': 4, 'LZW': 5, 'JPEG': 6, 'PackBits': 32773})
262
TagInfo(value=262, name='PhotometricInterpretation', type=3, length=1, enum={'WhiteIsZero': 0, 'BlackIsZero': 1, 'RGB': 2, 'RGB Palette': 3, 'Transparency Mask': 4, 'CMYK': 5, 'YCbCr': 6, 'CieLAB': 8, 'CFA': 32803, 'LinearRaw': 32892})
273
TagInfo(value=273, name='StripOffsets', type=4, length=0, enum={})
339
TagInfo(value=339, name='SampleFormat', type=3, length=0, enum={})
277
TagInfo(value=277, name='SamplesPerPixel', type=3, length=1, enum={})
279
TagInfo(value=279, name='StripByteCounts', type=4, length=0, enum={})
282
TagInfo(value=282, name='XResolution', type=5, length=1, enum={})
283
TagInfo

In [4]:
# let's get data for our first image
tag_dict = {'BitsPerSample': 258,
            'Compression': 259,
            'SamplesPerPixel': 277,
           }
for key, value in tag_dict.items():
    print(f'{key} : {image.tag_v2[value]}')

BitsPerSample : (1,)
Compression : 4
SamplesPerPixel : 1


In [5]:
# can use Image.getcolors() to identify if the image contains, color, grayscale, or only bitonal data
# if only 1-2 colors returned then bitonal (all black or all white = bitonal, too!)
# if 3-256 colors returned then grayscale
# if 257+ colors, it returns None and it has color data
color_list = image.getcolors()
if color_list is None:
    number_of_colors = 'Over 256 colors'
else:
    number_of_colors = len(color_list)
print(f'Number of colors in the image: {number_of_colors}')

Number of colors in the image: 2


In [6]:
# working functions
bit_depth_lookup_dict = {24: 'rgb',
                         8: 'gray',
                         1: 'bitonal',
                        }
bits_per_sample_lookup_dict = {(8, 8, 8): 24,
                               (8,): 8,
                               (1,): 1,
                              }
compression_lookup_dict = {1: 'uncompressed',
                           4: 'group4',
                           5: 'LZW',
                           6: 'JPEG',
                          }

def get_bit_depth_from_colors(image):
    color_list = image.getcolors()
    if color_list is None:  # image is over 256 colors
        bit_depth = 24
    else:
        number_of_colors = len(color_list)
        if number_of_colors > 2:
            bit_depth = 8
        elif number_of_colors > 0:
            bit_depth = 1
        else:
            bit_depth = None
    return bit_depth

def get_bit_depth_from_metadata(image):
    BitsPerSample = image.tag_v2[258]  ## tag 258 is the BitsPerSample tag
    bit_depth = bits_per_sample_lookup_dict[BitsPerSample]
    if isinstance(bit_depth, int):
        return bit_depth
    else:
        raise TypeError(f'"{bit_depth}" is not an int')

def get_compression(image):
    compression = image.tag_v2[259]
    compression = compression_lookup_dict[compression]
    return compression

In [7]:
image_path = tif_paths_list[0]
print(f'{image_path.parts[-2]}/{image_path.name}')

out/000001.tif


In [8]:
# progress bar
progress_label = Label('Process *.tif')
progress_bar = IntProgress(min=0, max=number_of_tifs)
progress_widget = VBox([progress_label, progress_bar])
display(progress_widget)

# NOTE: writing this as a for-loop, but need to come back and
# refactor into OOP after getting logic and processes down
rgb_paths_list = []
gray_paths_list = []
bitonal_paths_list = []
bitdepth_doesnt_match_paths_list = []

for index, image_path in enumerate(tif_paths_list):
    label = f'Processing {index} of {number_of_tifs}: {"/".join(image_path.parts[-3:])}'
    progress_label.value = label
    image = Image.open(image_path)
    bit_depth_from_colors = get_bit_depth_from_colors(image)
    bit_depth_from_metadata = get_bit_depth_from_metadata(image)
    if bit_depth_from_colors != bit_depth_from_metadata:
        bitdepth_doesnt_match_paths_list.append(image_path)
        # print out what we SHOULD do with it!
        # print(f'\ncolors "{bit_depth_from_colors}" != metadata "{bit_depth_from_metadata}"\n{image_path}\nConvert to {bit_depth_lookup_dict[bit_depth_from_colors]}')
#         print(f'{image_path} --> {bit_depth_lookup_dict[bit_depth_from_colors]}')
        bit_depth = bit_depth_from_metadata
    else:
        bit_depth = bit_depth_from_colors
    if bit_depth == 24:
        rgb_paths_list.append(image_path)
    elif bit_depth == 8:
        gray_paths_list.append(image_path)
    elif bit_depth == 1:
        bitonal_paths_list.append(image_path)
    else:
        raise ValueError(f'{bit_depth} is not 1, 8, or 24')
    
#     if bit_depth > 1:
#         compression = get_compression(image)
#         if compression == 'LZW':  # re-save image uncompressed
#             image.save(image_path, tiffinfo=image.tag, compression=None, dpi=image.info['dpi'])
    
    # increment progress_bar
    progress_bar.value = index + 1
else:  # no break
    label = f'Processing complete'
    progress_label.value = label

VBox(children=(Label(value='Process *.tif'), IntProgress(value=0, max=101)))

In [9]:
len(bitdepth_doesnt_match_paths_list)

0

In [10]:
len(rgb_paths_list)

0

In [11]:
plt.close('all')  # close all figures as ipympl will keep them ALL open for accessing
# This is actually how I can easily pop images into the SAME figure/window, though
# For example, I could add buttons to convert images to different values using Pillow
# My bitdepth_doesn't_match_paths_list should REALLY be a dict with the value it should be
# Hmmmm, or I could just re-open and test the images at THIS time. 

for image_path in bitdepth_doesnt_match_paths_list[:5]:
    image = Image.open(image_path)
    # create a new MatPlotLib figure so we can plot each image
    fig = plt.figure(num=f'{"/".join(image_path.parts[-3:])}', frameon=False).tight_layout()
        
    # show image
    plt.imshow(image, cmap='gray')

In [12]:
# what do we need to check and in what order?
# if continuous tone we should verify that it's not bitonal by getting image as 
check_if_color_path_list = []
grayscale_path_list = []
if image.tag_v2[258] == '(8, 8, 8)': # image is color
    check_if_color_path_list.append(tif_path)
elif image.tag_v2[258] == 8:
    check_if_grayscale_list.append(tif_path)

In [None]:
# open in Preview
# image.show()