## Project Ceres - Move Digibook UTESC Images into Directories

Digibook scans for UT extension special circulars were all scanned into a single folder

Need to copy images into directories using the AdminDB numbers

In [1]:
# importing
import shutil
from pathlib import Path

from ipywidgets import IntProgress, Label, VBox
from IPython.display import display

In [2]:
convert_to_admin_db_numbers_directory_path = Path('data/00_UTSpecialCirculars_toAdminDBNumbers')

In [3]:
# get image paths list
image_paths_list = sorted([x for x in convert_to_admin_db_numbers_directory_path.iterdir() if str(x).endswith('.tif')])

In [4]:
print(f'{len(image_paths_list)} *.tif files to process')

1951 *.tif files to process


In [5]:
# circular names and numbers (because some have letters in them -- NOTE manually renamed the ones with letters)
circulars_item_list = sorted(set([int(str(x.name).split('_', 3)[2]) for x in image_paths_list]))
print(circulars_item_list)

[24, 25, 26, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 158, 162, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 188, 191, 192, 194, 198, 234, 235, 241, 242, 243, 245, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 259, 260, 262, 263, 264, 265, 266, 267, 268, 270, 271, 273, 274, 276, 278, 283, 285, 287, 288, 289, 290, 292, 293, 294, 296, 307, 308, 309, 311, 312, 313, 314, 315, 316, 317, 319, 320, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 335, 336, 337, 340, 342, 346, 361, 369, 375, 380, 383, 387, 388, 389, 392, 394, 395,

In [6]:
# number of items
len(circulars_item_list)

218

In [7]:
# copy images into AdminDB folders

# progress bar
progress_label = Label('extension circular being processed')
progress_bar = IntProgress(min=0, max=len(circulars_item_list))
progress_widget = VBox([progress_label, progress_bar])
display(progress_widget)

count = 0

# loop through images
for outer_loop_index, circular_item_number in enumerate(circulars_item_list, start=1):
    
    label = f'Processing extension circular #{circular_item_number} . . .'
    progress_label.value = label
    
    # create adminDB directory
    adminDB_directory_name = f'0012_004268_{str(outer_loop_index).zfill(4)}'
    adminDB_directory_path = convert_to_admin_db_numbers_directory_path.parents[0].joinpath('utesc_adminDB', adminDB_directory_name)
    adminDB_directory_path.mkdir(parents=True, exist_ok=True)
    
    # get image_list to copy into directory
    circular_item_image_name_stub = f'0012_004268_{str(circular_item_number).zfill(4)}'
    circular_item_image_paths_list = sorted(convert_to_admin_db_numbers_directory_path.glob(f'{circular_item_image_name_stub}_*.tif'))
    
    # copy item into adminDB directory
    for image_path in circular_item_image_paths_list:
        copy_item_path = adminDB_directory_path.joinpath(image_path.name)
        shutil.copyfile(image_path, copy_item_path)
        
        if copy_item_path.is_file() and copy_item_path.stat().st_size == image_path.stat().st_size:
            image_path.unlink()
    
    # rename items in adminDB directory to match directory name
    image_paths_to_rename_list = [x for x in sorted(adminDB_directory_path.glob('*.tif'))]
    
    for index, image_path in enumerate(image_paths_to_rename_list, start=1):
        adminDB_image_file_name = f'{adminDB_directory_name}_{str(index).zfill(4)}.tif'
        adminDB_image_file_path = image_path.parents[0].joinpath(adminDB_image_file_name)
        
        image_path.rename(adminDB_image_file_path)
    
    image_paths_list = sorted(adminDB_directory_path.glob('*.tif'))
    
    count += len(image_paths_list)
        
    progress_bar.value = outer_loop_index
        
print(f'{count} files moved and renamed for adminDB numbers')

VBox(children=(Label(value='extension circular being processed'), IntProgress(value=0, max=218)))

1951 files moved and renamed for adminDB numbers
