# Find \*.tifs with no matching \*.jpg

#### Created on Cinco de Mayo in 2020 by Jeremy Moore and David Armstrong to identify \*.tif images that don't have a matching \*.jpg image for the Asian Art Museum of San Francisco

1. Manually set root_dir_path to the full path of the directory containing your *all_jpgs* and *all_tifs* directories
1. Programatically create a *no_match* directory inside of *all_tifs*
1. Get list of all \*.tifs in *all_tifs* directory
1. Get the identier, or stem, of each \*.tif
1. Check if this identifier exists as a \*.jpg in the *all_jpgs* directory first as a test
1. Run again and if there is no matching \*.jpg, move the \*.tif into the *no_match* directory

***Update root_dir_path location and verify names of *.jpg and *.tif directories below BEFORE running any cells!***

In [1]:
# imports from standard library
from pathlib import Path

In [2]:
# set root directory path that contains the directories with our tifs and jpgs
root_dir_path = Path('/Users/dlisla/Pictures/test_directory')

print(f'root_dir_path: {root_dir_path}')
print(f'root_dir_path.name: {root_dir_path.name}')

root_dir_path: /Users/dlisla/Pictures/test_directory
root_dir_path.name: test_directory


In [3]:
# set path to directory with our all_jpgs and all_tifs
bad_jpg_dir_path = root_dir_path.joinpath('all_jpgs')
all_tifs_dir_path = root_dir_path.joinpath('all_tifs')

# create a directory inside of all_tifs directory named no_match to move 
no_match_dir_path = all_tifs_dir_path.joinpath('no_match')
no_match_dir_path.mkdir()  # will raise a FileExistsError if the no_match directory already exists

# verify existence of no_match directory, if False, then do not continue
print(f'Does the no_match directory exist? {no_match_dir_path.is_dir()}')

Does the no_match directory exist? True


In [4]:
# get sorted list of all *.tifs in all_tifs directory
# NOTE: this is NOT recursive and will not look inside of all_tifs subdirectories
# NOTE: this may also find non-image hidden files that start with a '.' and end with .tif
tif_path_list = sorted(all_tifs_dir_path.glob('*.tif'))

print(f'Total number of *.tif: {len(tif_path_list)}\n')
print(f'First *.tif paths: {tif_path_list[0]}')
print(f'Last *.tif paths: {tif_path_list[-1]}')

Total number of *.tif: 4

First *.tif paths: /Users/dlisla/Pictures/test_directory/all_tifs/01.tif
Last *.tif paths: /Users/dlisla/Pictures/test_directory/all_tifs/04.tif


In [5]:
# for loop to test our code test what will happen
for tif_path in tif_path_list:
    
    # get image's identifier to match against the JPEG filenames
    identifier = tif_path.stem  # stem is the Python name for identifier
    
    
    # set jpg filename and path
    jpg_filename = f'{identifier}.jpg'
    jpg_path = bad_jpg_dir_path.joinpath(jpg_filename)
    
    
    # does jpg exist?
    if jpg_path.is_file():  # there's a match

        # print(f'{jpg_path.name} has a match!\n')  # commented out to silently skip matched images
        pass
    
    else:  # we need to move it into our no_match directory
        
        print(f'{tif_path.name} has no matching *.jpg')
        
        # set new tif path inside of the no_match directory
        new_tif_path = no_match_dir_path.joinpath(tif_path.name)
        
        print(f'Moving to {new_tif_path} . . . (not really, this is a test)\n')

03.tif has no matching *.jpg
Moving to /Users/dlisla/Pictures/test_directory/all_tifs/no_match/03.tif . . . (not really, this is a test)



In [6]:
# warning, will move files!
for tif_path in tif_path_list:
    
    # get image's identifier to match against the JPEG filenames
    identifier = tif_path.stem  # stem is the Python name for identifier
    
    
    # set jpg filename and path
    jpg_filename = f'{identifier}.jpg'
    jpg_path = bad_jpg_dir_path.joinpath(jpg_filename)
    
    
    # does jpg exist?
    if jpg_path.is_file():  # there's a match

        # print(f'{jpg_path.name} has a match!\n')  # commented out to silently skip matched images
        pass
    
    else:  # we need to move it into our no_match directory
        
        print(f'{tif_path.name} has no JPEG')
        
        # set new tif path inside of the no_match directory
        new_tif_path = no_match_dir_path.joinpath(tif_path.name)
        
        print(f'Moving to {new_tif_path} . . .')
        
        # move our file
        tif_path.rename(new_tif_path)
        
        if new_tif_path.is_file():
            print('Success!\n')
        else:
            print('Something broke with moving:{tif_path.name} to {tif_path}!!\n')

03.tif has no JPEG
Moving to /Users/dlisla/Pictures/test_directory/all_tifs/no_match/03.tif . . .
Success!

