From 4fdbaed49268ecfcaf816158324e6a4bdc5d56ec Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Wed, 30 Aug 2023 21:18:47 -0400 Subject: [PATCH 1/2] Handle arbitrary sidecar files Currently support exists for handling .xmp sidecars. This change adds support for arbitrary sidecar extensions. It also expands the set of sidecars which are handled by default. The new default set, and the reason this set was chosen, is: .xmp - general metadata format for all image types .json - generated by google takeout when exporting from g photos .yml/.yaml - used by Photoprism Users can override this default set by passing the new --sidecars option. For example, to handle .xyz, users should pass --sidecars="xmp,json,yml,xyz" --- phockup.py | 30 ++++++++++++++++++++++ readme.md | 27 ++++++++++++++++++++ src/phockup.py | 67 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 101 insertions(+), 23 deletions(-) diff --git a/phockup.py b/phockup.py index 4b52e78..ad263ec 100755 --- a/phockup.py +++ b/phockup.py @@ -187,6 +187,36 @@ def parse_args(args=sys.argv[1:]): """, ) + parser.add_argument( + "--sidecars", + default=",".join(Phockup.DEFAULT_SIDECAR_EXTENSIONS), + action="store", + help=""" +Override the set of extensions that are considered to be sidecar files. +Sidecars are files that have the same name as an image or video file, but a +different extension. They typically contain additional metadata pertaining to +the image or video file. + +File extensions which are considered to be sidecars if they are named the same +as a corresponding image file are: + + """ + + str(Phockup.DEFAULT_SIDECAR_EXTENSIONS) + + """ + +So, for example, if image.jpg exists, then image.xmp (or image.jpg.xmp) will be +considered a sidecar file of image.jpg. + +When moving the main file, sidecars will inherit the name of the main file and +be moved to the same location rather than being placed in the unknown +directory. + +Using this argument you can change this set. For example, to only consider XMP +and JSON: + --sidecars='xmp,json' +""", + ) + exclusive_group_debug_silent = parser.add_mutually_exclusive_group() exclusive_group_debug_silent.add_argument( diff --git a/readme.md b/readme.md index 263500f..613119b 100644 --- a/readme.md +++ b/readme.md @@ -255,6 +255,33 @@ The output may look like this, but with more fields: If the correct date is in `DateTimeOriginal`, you can include the option `--date-field=DateTimeOriginal` to get date information from it. To set multiple fields to be tried in order until a valid date is found, just join them with spaces in a quoted string like `"CreateDate FileModifyDate"`. +### Handle sidecars +Sidecars are files that have the same name as an image or video file, but a different extension. They typically contain additional metadata pertaining to the image or video file. + +File extensions which are considered to be sidecars if they are named the same as a corresponding image file are: + + * .xmp + * .json + * .yaml + * .yml + +So, for example, if `image.jpg` exists, then `image.xmp` (or `image.jpg.xmp`) will be considered a sidecar file of image.jpg. + +When moving the main file, sidecars will inherit the name of the main file and be moved to the same location rather than being placed in the unknown directory. + +You can change which file extensions are eligible to be considered sidecars using the `--sidecars` argument. For example, to only treat `.xmp` and `.json` as sidecars: + +``` +--sidecars='xmp,json' +``` + +To disable handling sidecars entirely: + +``` +--sidecars='' +``` + + ### Dry run If you want phockup to run without any changes (don't copy/move any files) but just show which changes would be done, enable this feature by using the flag `-y | --dry-run`. diff --git a/src/phockup.py b/src/phockup.py index 0d69728..8e7d0f6 100755 --- a/src/phockup.py +++ b/src/phockup.py @@ -18,6 +18,14 @@ class Phockup: + DEFAULT_SIDECAR_EXTENSIONS = ["json", "xmp", "yml", "yaml"] + """ + Sidecar files are files with the same name as the source file, but with a + different (or additional) extension. For example, 'image.jpg' could have + possible sidecars 'image.jpg.xmp', 'image.json', 'image.yml', etc. + + This member stores the recognized sidecar extensions. + """ DEFAULT_DIR_FORMAT = ['%Y', '%m', '%d'] DEFAULT_NO_DATE_DIRECTORY = "unknown" @@ -53,6 +61,11 @@ def __init__(self, input_dir, output_dir, **args): self.dry_run = args.get('dry_run', False) self.progress = args.get('progress', False) self.max_depth = args.get('max_depth', -1) + self.sidecar_extensions = ( + args.get("sidecars").split(",") + if args.get("sidecars") + else Phockup.DEFAULT_SIDECAR_EXTENSIONS + ) # default to concurrency of one to retain existing behavior self.max_concurrency = args.get("max_concurrency", 1) if self.max_concurrency > 1: @@ -242,9 +255,9 @@ def process_files(self, file_paths_to_process): def process_file(self, filename): """ Process the file using the selected strategy - If file is .xmp skip it so process_xmp method can handle it + If file is a sidecar skip it so process_sidecars method can handle it """ - if str.endswith(filename, '.xmp'): + if any([filename.lower().endswith(sc_ext) for sc_ext in self.sidecar_extensions]): return None progress = f'{filename}' @@ -309,7 +322,7 @@ def process_file(self, filename): self.pbar.write(progress) logger.info(progress) - self.process_xmp(filename, target_file_name, suffix, output) + self.process_sidecars(filename, target_file_name, suffix, output) break suffix += 1 @@ -344,32 +357,40 @@ def get_file_name_and_path(self, filename): target_file_path = os.path.sep.join([output, target_file_name]) return output, target_file_name, target_file_path, target_file_type - def process_xmp(self, original_filename, file_name, suffix, output): + def process_sidecars(self, original_filename, file_name, suffix, output): """ - Process xmp files. These are metadata for RAW images + Given an existing image, handle any sidecar files. """ - xmp_original_with_ext = original_filename + '.xmp' - xmp_original_without_ext = os.path.splitext(original_filename)[0] + '.xmp' - + car_no_ext, car_extension = os.path.splitext(original_filename) + new_car_no_ext = os.path.splitext(file_name)[0] suffix = f'-{suffix}' if suffix > 1 else '' - xmp_files = {} - - if os.path.isfile(xmp_original_with_ext): - xmp_target = f'{file_name}{suffix}.xmp' - xmp_files[xmp_original_with_ext] = xmp_target - if os.path.isfile(xmp_original_without_ext): - xmp_target = f'{(os.path.splitext(file_name)[0])}{suffix}.xmp' - xmp_files[xmp_original_without_ext] = xmp_target - - for original, target in xmp_files.items(): - xmp_path = os.path.sep.join([output, target]) - logger.info(f'{original} => {xmp_path}') + # Generate list of possible sidecars + sidecars = [ + sidecar + for sc_ext in self.sidecar_extensions + for sidecar in ( + car_no_ext + "." + sc_ext, + car_no_ext + car_extension + "." + sc_ext, + ) + ] + # Filter to only those that exist + sidecars = [sidecar for sidecar in sidecars if os.path.isfile(sidecar)] + # Build target filenames for sidecars + sidecars = [ + (sidecar, sidecar.replace(car_no_ext, f"{new_car_no_ext}{suffix}")) + for sidecar in sidecars + ] + + # Perform the move + for original, target in sidecars: + sidecar_path = os.path.sep.join([output, target]) + logger.info(f"{original} => {sidecar_path}") if not self.dry_run: if self.move: - shutil.move(original, xmp_path) + shutil.move(original, sidecar_path) elif self.link: - os.link(original, xmp_path) + os.link(original, sidecar_path) else: - shutil.copy2(original, xmp_path) + shutil.copy2(original, sidecar_path) From 04789d03055dca5a399295e0ba76c5aa2accd407 Mon Sep 17 00:00:00 2001 From: Quentin Young Date: Thu, 21 Sep 2023 03:21:59 -0400 Subject: [PATCH 2/2] generate report regarding sidecar files --- src/phockup.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/phockup.py b/src/phockup.py index 8e7d0f6..a4367b7 100755 --- a/src/phockup.py +++ b/src/phockup.py @@ -12,6 +12,7 @@ from src.date import Date from src.exif import Exif +from pprint import pprint logger = logging.getLogger('phockup') ignored_files = ('.DS_Store', 'Thumbs.db') @@ -45,6 +46,8 @@ def __init__(self, input_dir, output_dir, **args): if output_dir.endswith(os.path.sep): output_dir = output_dir[:-1] + self.skipped_for_later = set() + self.handled_as_sidecar = set() self.input_dir = input_dir self.output_dir = output_dir self.output_prefix = args.get('output_prefix' or None) @@ -113,6 +116,12 @@ def print_action_report(self, run_time): logger.info(f"Would have moved {self.files_moved} files.") else: logger.info(f"Moved {self.files_moved} files.") + logger.info(f"Files ignored as sidecars ({len(self.skipped_for_later)}):") + pprint(self.skipped_for_later) + logger.info(f"Files processed as sidecars ({len(self.handled_as_sidecar)}):") + pprint(self.handled_as_sidecar) + logger.info("Files ignored as sidecars and not handled as sidecars:") + pprint(list(self.skipped_for_later - self.handled_as_sidecar)) def check_directories(self): """ @@ -258,6 +267,7 @@ def process_file(self, filename): If file is a sidecar skip it so process_sidecars method can handle it """ if any([filename.lower().endswith(sc_ext) for sc_ext in self.sidecar_extensions]): + self.skipped_for_later.add(filename) return None progress = f'{filename}' @@ -384,6 +394,7 @@ def process_sidecars(self, original_filename, file_name, suffix, output): # Perform the move for original, target in sidecars: + self.handled_as_sidecar.add(original) sidecar_path = os.path.sep.join([output, target]) logger.info(f"{original} => {sidecar_path}")