In [1]:
# imports
import shutil
from pathlib import Path

from ipywidgets import IntProgress, Label, VBox
from IPython.display import display

In [3]:
# set network path and get list of directories to process
data_directory = Path('/Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR')
input_directory = data_directory.joinpath('1.toOCR')
output_directory = data_directory.joinpath('2.OCRed')

directories_to_convert_to_pdf_paths_list = sorted([x for x in input_directory.iterdir() if x.is_dir()])

print(f'{len(directories_to_convert_to_pdf_paths_list)} directories to process')

0 directories to process


In [4]:
!open {str(data_directory)}

In [6]:
# functions

def get_tif_paths_list(directory_path, scantailor=False):
    
    # if directory_path not a Path-like object then make it one
    if not isinstance(directory_path, Path):
        directory_path = Path(directory_path)
        
    # add 'out' path to directory if processed with ScanTailor
    if scantailor:
        directory_path = Path(directory_path).joinpath('out')
    
    # get paths to *.tif to process
    tif_paths_list = sorted(directory_path.glob('*.tif'))
    
    if len(tif_paths_list) == 0:
        tif_paths_list = sorted(directory_path.glob('*.tiff'))
    
    # only keep *.tif that DO NOT start with '.' to exclude macOS index files
    tif_paths_list = [x for x in tif_paths_list if not str(x.name).startswith('.')]
    
    return tif_paths_list


def create_temporary_pdf_directory(directory_path):
    
    # if directory_path not a Path-like object then make it one
    if not isinstance(directory_path, Path):
        directory_path = Path(directory_path)
        
    temporary_pdf_directory_path = Path.home().joinpath(f'Desktop/_temporary_pdfs_{directory_path.name}')
    
    try:  # creating the temporary directory
        temporary_pdf_directory_path.mkdir()
    except FileExistsError:  # delete the directory & all contents then create it
        shutil.rmtree(temporary_pdf_directory_path)
        temporary_pdf_directory_path.mkdir()
        
    return temporary_pdf_directory_path


def batch_ocr_tifs_into_pdfs(tif_paths_list, temporary_pdf_directory_path):
    
    number_of_tifs = len(tif_paths_list)
    number_of_pdfs = 0
    
    # progress bar
    progress_label = Label('OCR *.tif')
    progress_bar = IntProgress(min=0, max=number_of_tifs)
    progress_widget = VBox([progress_label, progress_bar])
    display(progress_widget)
    
    for tif_path in tif_paths_list:
        
        label = f'OCRing {tif_path.name}'
        progress_label.value = label

        temporary_pdf_output_path = temporary_pdf_directory_path.joinpath(tif_path.stem)

        # ocr *.tif with tesseract
        # NOTE: output SHOULD NOT have extension on end!
        !tesseract "{str(tif_path)}" "{str(temporary_pdf_output_path)}" pdf 2>/dev/null

        # Add the suffix .pdf that tesseract didn't want
        temporary_pdf_output_path = temporary_pdf_directory_path.joinpath(f'{tif_path.stem}.pdf') 

        if temporary_pdf_output_path.is_file():
            number_of_pdfs += 1
            progress_bar.value = number_of_pdfs
        
    if number_of_pdfs != number_of_tifs:
        print(f'ERROR - ERROR - ERROR')
        print(f'# of *.tif: {number_of_tifs}')
        print(f'# of PDFs: {number_of_pdfs}')
    else:
        print(f'{number_of_pdfs} PDFs created and saved in {temporary_pdf_directory_path}')
    
    
def concatenate_pdfs(temporary_pdf_directory_path):

    # strip '_temporary_pdfs_' from beginning of directory path to get combined PDF name
    combined_pdf_name = f"{str(temporary_pdf_directory_path).split('_temporary_pdfs_')[1]}.pdf"

    combined_pdf_output_path = temporary_pdf_directory_path.joinpath(combined_pdf_name)

    # concatenate with pdftk
    !pdftk "{str(temporary_pdf_directory_path)}"/*.pdf cat output "{str(combined_pdf_output_path)}"

    if combined_pdf_output_path.is_file() and combined_pdf_output_path.stat().st_size > 0:
        print(f'{combined_pdf_output_path} created with size {round((combined_pdf_output_path.stat().st_size/1024/1024/1024), 2)} GB')

    return combined_pdf_output_path

In [9]:
for dir_path in directories_to_convert_to_pdf_paths_list:
    
    # get list of *.tif to process
    tif_paths_list = get_tif_paths_list(dir_path)
    
    number_of_tifs = len(tif_paths_list)
    print(f'{number_of_tifs} *.tif to process in {dir_path}')
    
    # create temp directory
    temp_pdf_dir_path = create_temporary_pdf_directory(dir_path)
    
    # OCR tifs into PDFs with progress bar
    batch_ocr_tifs_into_pdfs(tif_paths_list, temp_pdf_dir_path)
    
    # concatenate PDFs into single file
    combined_pdf_path = concatenate_pdfs(temp_pdf_dir_path)
    
    # copy combined PDF to copy directory
    output_pdf_path = output_directory.joinpath(combined_pdf_path.name)
    shutil.copy(combined_pdf_path, output_pdf_path)
    
    if output_pdf_path.is_file():  # move processed dir_path into output_directory
        renamed_dir_path = output_directory.joinpath(dir_path.name)
        dir_path.rename(renamed_dir_path)

    print('')
    print('*****')    
    print(f'{dir_path.name} is now {renamed_dir_path}')
    print('*****')
    print('')

514 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1977-09-23_EX1-26


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=514)))

514 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1977-09-23_EX1-26
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1977-09-23_EX1-26/R_ BOT_1977-09-23_EX1-26.pdf created with size 0.12 GB

*****
R_ BOT_1977-09-23_EX1-26 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1977-09-23_EX1-26
*****

46 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1977-10-29_EX1-7


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=46)))

46 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1977-10-29_EX1-7
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1977-10-29_EX1-7/R_ BOT_1977-10-29_EX1-7.pdf created with size 0.01 GB

*****
R_ BOT_1977-10-29_EX1-7 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1977-10-29_EX1-7
*****

17 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1977-11-26_EX1


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=17)))

17 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1977-11-26_EX1
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1977-11-26_EX1/R_ BOT_1977-11-26_EX1.pdf created with size 0.0 GB

*****
R_ BOT_1977-11-26_EX1 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1977-11-26_EX1
*****

833 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-02-24_EX1-38


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=833)))

833 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-02-24_EX1-38
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-02-24_EX1-38/R_ BOT_1978-02-24_EX1-38.pdf created with size 0.15 GB

*****
R_ BOT_1978-02-24_EX1-38 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-02-24_EX1-38
*****

11 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-05-31_EX1-5


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=11)))

11 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-05-31_EX1-5
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-05-31_EX1-5/R_ BOT_1978-05-31_EX1-5.pdf created with size 0.0 GB

*****
R_ BOT_1978-05-31_EX1-5 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-05-31_EX1-5
*****

97 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-06-14_EX1-7


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=97)))

97 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-06-14_EX1-7
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-06-14_EX1-7/R_ BOT_1978-06-14_EX1-7.pdf created with size 0.02 GB

*****
R_ BOT_1978-06-14_EX1-7 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-06-14_EX1-7
*****

796 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-06-15_EX1-53


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=796)))

796 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-06-15_EX1-53
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-06-15_EX1-53/R_ BOT_1978-06-15_EX1-53.pdf created with size 0.17 GB

*****
R_ BOT_1978-06-15_EX1-53 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-06-15_EX1-53
*****

168 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-08-02_EX1-9


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=168)))

168 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-08-02_EX1-9
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-08-02_EX1-9/R_ BOT_1978-08-02_EX1-9.pdf created with size 0.06 GB

*****
R_ BOT_1978-08-02_EX1-9 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-08-02_EX1-9
*****

79 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-08-31_EX1-15


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=79)))

79 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-08-31_EX1-15
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-08-31_EX1-15/R_ BOT_1978-08-31_EX1-15.pdf created with size 0.02 GB

*****
R_ BOT_1978-08-31_EX1-15 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-08-31_EX1-15
*****

811 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1978-10-20_EX1-37


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=811)))

811 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-10-20_EX1-37
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1978-10-20_EX1-37/R_ BOT_1978-10-20_EX1-37.pdf created with size 0.17 GB

*****
R_ BOT_1978-10-20_EX1-37 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1978-10-20_EX1-37
*****

275 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-03-05_EX1-49


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=275)))

275 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-03-05_EX1-49
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-03-05_EX1-49/R_ BOT_1979-03-05_EX1-49.pdf created with size 0.06 GB

*****
R_ BOT_1979-03-05_EX1-49 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-03-05_EX1-49
*****

263 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-05-04_EX1-13


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=263)))

263 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-05-04_EX1-13
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-05-04_EX1-13/R_ BOT_1979-05-04_EX1-13.pdf created with size 0.05 GB

*****
R_ BOT_1979-05-04_EX1-13 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-05-04_EX1-13
*****

83 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-06-20_EX1-13


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=83)))

83 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-06-20_EX1-13
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-06-20_EX1-13/R_ BOT_1979-06-20_EX1-13.pdf created with size 0.02 GB

*****
R_ BOT_1979-06-20_EX1-13 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-06-20_EX1-13
*****

906 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-06-21_EX1-59


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=906)))

906 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-06-21_EX1-59
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-06-21_EX1-59/R_ BOT_1979-06-21_EX1-59.pdf created with size 0.21 GB

*****
R_ BOT_1979-06-21_EX1-59 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-06-21_EX1-59
*****

4 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-07-11_EX1-2


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=4)))

4 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-07-11_EX1-2
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-07-11_EX1-2/R_ BOT_1979-07-11_EX1-2.pdf created with size 0.0 GB

*****
R_ BOT_1979-07-11_EX1-2 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-07-11_EX1-2
*****

10 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-09-10_EX1-4


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=10)))

10 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-09-10_EX1-4
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-09-10_EX1-4/R_ BOT_1979-09-10_EX1-4.pdf created with size 0.0 GB

*****
R_ BOT_1979-09-10_EX1-4 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-09-10_EX1-4
*****

874 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-09-28_EX1-43


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=874)))

874 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-09-28_EX1-43
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-09-28_EX1-43/R_ BOT_1979-09-28_EX1-43.pdf created with size 0.18 GB

*****
R_ BOT_1979-09-28_EX1-43 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-09-28_EX1-43
*****

22 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-10-29_EX1-2


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=22)))

22 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-10-29_EX1-2
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-10-29_EX1-2/R_ BOT_1979-10-29_EX1-2.pdf created with size 0.0 GB

*****
R_ BOT_1979-10-29_EX1-2 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-10-29_EX1-2
*****

40 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1979-12-12_EX1-12


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=40)))

40 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-12-12_EX1-12
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1979-12-12_EX1-12/R_ BOT_1979-12-12_EX1-12.pdf created with size 0.01 GB

*****
R_ BOT_1979-12-12_EX1-12 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1979-12-12_EX1-12
*****

501 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-02-08_EX1-38


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=501)))

501 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-02-08_EX1-38
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-02-08_EX1-38/R_ BOT_1980-02-08_EX1-38.pdf created with size 0.1 GB

*****
R_ BOT_1980-02-08_EX1-38 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-02-08_EX1-38
*****

138 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-04-25_EX1-16


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=138)))

138 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-04-25_EX1-16
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-04-25_EX1-16/R_ BOT_1980-04-25_EX1-16.pdf created with size 0.02 GB

*****
R_ BOT_1980-04-25_EX1-16 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-04-25_EX1-16
*****

95 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-06-18_EX1-10


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=95)))

95 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-06-18_EX1-10
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-06-18_EX1-10/R_ BOT_1980-06-18_EX1-10.pdf created with size 0.02 GB

*****
R_ BOT_1980-06-18_EX1-10 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-06-18_EX1-10
*****

956 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-06-19_EX1-67


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=956)))

956 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-06-19_EX1-67
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-06-19_EX1-67/R_ BOT_1980-06-19_EX1-67.pdf created with size 0.21 GB

*****
R_ BOT_1980-06-19_EX1-67 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-06-19_EX1-67
*****

10 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-08-18_EX1-3


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=10)))

10 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-08-18_EX1-3
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-08-18_EX1-3/R_ BOT_1980-08-18_EX1-3.pdf created with size 0.0 GB

*****
R_ BOT_1980-08-18_EX1-3 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-08-18_EX1-3
*****

1169 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-10-17_EX1-46


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=1169)))

1169 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-10-17_EX1-46
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-10-17_EX1-46/R_ BOT_1980-10-17_EX1-46.pdf created with size 0.23 GB

*****
R_ BOT_1980-10-17_EX1-46 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-10-17_EX1-46
*****

18 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-11-15_EX1-6_


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=18)))

18 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-11-15_EX1-6_
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-11-15_EX1-6_/R_ BOT_1980-11-15_EX1-6_.pdf created with size 0.0 GB

*****
R_ BOT_1980-11-15_EX1-6_ is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-11-15_EX1-6_
*****

10 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1980-12-12_EX1-2


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=10)))

10 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-12-12_EX1-2
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1980-12-12_EX1-2/R_ BOT_1980-12-12_EX1-2.pdf created with size 0.0 GB

*****
R_ BOT_1980-12-12_EX1-2 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1980-12-12_EX1-2
*****

160 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-03-19_EX1-6


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=160)))

160 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-03-19_EX1-6
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-03-19_EX1-6/R_ BOT_1981-03-19_EX1-6.pdf created with size 0.03 GB

*****
R_ BOT_1981-03-19_EX1-6 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-03-19_EX1-6
*****

6 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-05-11_EX1


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=6)))

6 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-05-11_EX1
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-05-11_EX1/R_ BOT_1981-05-11_EX1.pdf created with size 0.0 GB

*****
R_ BOT_1981-05-11_EX1 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-05-11_EX1
*****

29 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-05-29_EX1-4


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=29)))

29 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-05-29_EX1-4
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-05-29_EX1-4/R_ BOT_1981-05-29_EX1-4.pdf created with size 0.0 GB

*****
R_ BOT_1981-05-29_EX1-4 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-05-29_EX1-4
*****

95 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-06-17_EX1-8


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=95)))

95 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-06-17_EX1-8
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-06-17_EX1-8/R_ BOT_1981-06-17_EX1-8.pdf created with size 0.01 GB

*****
R_ BOT_1981-06-17_EX1-8 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-06-17_EX1-8
*****

1163 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-06-18_EX1-54


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=1163)))

1163 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-06-18_EX1-54
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-06-18_EX1-54/R_ BOT_1981-06-18_EX1-54.pdf created with size 0.2 GB

*****
R_ BOT_1981-06-18_EX1-54 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-06-18_EX1-54
*****

112 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-07-30_EX1-7


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=112)))

112 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-07-30_EX1-7
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-07-30_EX1-7/R_ BOT_1981-07-30_EX1-7.pdf created with size 0.02 GB

*****
R_ BOT_1981-07-30_EX1-7 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-07-30_EX1-7
*****

534 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-09-25_EX1-28


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=534)))

534 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-09-25_EX1-28
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-09-25_EX1-28/R_ BOT_1981-09-25_EX1-28.pdf created with size 0.09 GB

*****
R_ BOT_1981-09-25_EX1-28 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-09-25_EX1-28
*****

14 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-10-24_EX1-9


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=14)))

14 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-10-24_EX1-9
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-10-24_EX1-9/R_ BOT_1981-10-24_EX1-9.pdf created with size 0.0 GB

*****
R_ BOT_1981-10-24_EX1-9 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-10-24_EX1-9
*****

76 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1981-12-19_EX1-10


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=76)))

76 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-12-19_EX1-10
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1981-12-19_EX1-10/R_ BOT_1981-12-19_EX1-10.pdf created with size 0.01 GB

*****
R_ BOT_1981-12-19_EX1-10 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1981-12-19_EX1-10
*****

328 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1982-02-12_EX1-23


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=328)))

328 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-02-12_EX1-23
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-02-12_EX1-23/R_ BOT_1982-02-12_EX1-23.pdf created with size 0.06 GB

*****
R_ BOT_1982-02-12_EX1-23 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1982-02-12_EX1-23
*****

62 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1982-03-10_EX1-5


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=62)))

62 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-03-10_EX1-5
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-03-10_EX1-5/R_ BOT_1982-03-10_EX1-5.pdf created with size 0.01 GB

*****
R_ BOT_1982-03-10_EX1-5 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1982-03-10_EX1-5
*****

44 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1982-04-23_EX1-8


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=44)))

44 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-04-23_EX1-8
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-04-23_EX1-8/R_ BOT_1982-04-23_EX1-8.pdf created with size 0.01 GB

*****
R_ BOT_1982-04-23_EX1-8 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1982-04-23_EX1-8
*****

917 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1982-06-17_EX1-68


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=917)))

917 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-06-17_EX1-68
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-06-17_EX1-68/R_ BOT_1982-06-17_EX1-68.pdf created with size 0.14 GB

*****
R_ BOT_1982-06-17_EX1-68 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1982-06-17_EX1-68
*****

44 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1982-08-14_EX1-6


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=44)))

44 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-08-14_EX1-6
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-08-14_EX1-6/R_ BOT_1982-08-14_EX1-6.pdf created with size 0.01 GB

*****
R_ BOT_1982-08-14_EX1-6 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1982-08-14_EX1-6
*****

604 *.tif to process in /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/1.toOCR/R_ BOT_1982-10-15_EX1-38


VBox(children=(Label(value='OCR *.tif'), IntProgress(value=0, max=604)))

604 PDFs created and saved in /Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-10-15_EX1-38
/Users/dlisla/Desktop/_temporary_pdfs_R_ BOT_1982-10-15_EX1-38/R_ BOT_1982-10-15_EX1-38.pdf created with size 0.14 GB

*****
R_ BOT_1982-10-15_EX1-38 is now /Volumes/fluffy/0_JeremysWorkingSpace/2019-10/board-of-regents_toOCR/2.OCRed/R_ BOT_1982-10-15_EX1-38
*****



In [40]:
# open output_directory_path for visual Quality Assurance
!open {str(output_directory)}

In [4]:
# delete all '_temporary_pdfs_*' directories on the desktop
all_temp_dir_paths = sorted(Path('/Users/dlisla/Desktop/').glob('_temporary_pdfs_*'))
number_of_dir_paths = len(all_temp_dir_paths)
for temp_dir_path in all_temp_dir_paths:
    shutil.rmtree(temp_dir_path)

all_temp_dir_paths = sorted(Path('/Users/dlisla/Desktop/').glob('_temporary_pdfs_*'))
number_of_deleted_dir_paths = number_of_dir_paths - len(all_temp_dir_paths)

print(f'{number_of_deleted_dir_paths} _temporary_pdf_* directories deleted')

121 _temporary_pdf_* directories deleted
