diff --git a/phockup.py b/phockup.py index 4b52e78..ad263ec 100755 --- a/phockup.py +++ b/phockup.py @@ -187,6 +187,36 @@ def parse_args(args=sys.argv[1:]): """, ) + parser.add_argument( + "--sidecars", + default=",".join(Phockup.DEFAULT_SIDECAR_EXTENSIONS), + action="store", + help=""" +Override the set of extensions that are considered to be sidecar files. +Sidecars are files that have the same name as an image or video file, but a +different extension. They typically contain additional metadata pertaining to +the image or video file. + +File extensions which are considered to be sidecars if they are named the same +as a corresponding image file are: + + """ + + str(Phockup.DEFAULT_SIDECAR_EXTENSIONS) + + """ + +So, for example, if image.jpg exists, then image.xmp (or image.jpg.xmp) will be +considered a sidecar file of image.jpg. + +When moving the main file, sidecars will inherit the name of the main file and +be moved to the same location rather than being placed in the unknown +directory. + +Using this argument you can change this set. For example, to only consider XMP +and JSON: + --sidecars='xmp,json' +""", + ) + exclusive_group_debug_silent = parser.add_mutually_exclusive_group() exclusive_group_debug_silent.add_argument( diff --git a/readme.md b/readme.md index 263500f..613119b 100644 --- a/readme.md +++ b/readme.md @@ -255,6 +255,33 @@ The output may look like this, but with more fields: If the correct date is in `DateTimeOriginal`, you can include the option `--date-field=DateTimeOriginal` to get date information from it. To set multiple fields to be tried in order until a valid date is found, just join them with spaces in a quoted string like `"CreateDate FileModifyDate"`. +### Handle sidecars +Sidecars are files that have the same name as an image or video file, but a different extension. They typically contain additional metadata pertaining to the image or video file. + +File extensions which are considered to be sidecars if they are named the same as a corresponding image file are: + + * .xmp + * .json + * .yaml + * .yml + +So, for example, if `image.jpg` exists, then `image.xmp` (or `image.jpg.xmp`) will be considered a sidecar file of image.jpg. + +When moving the main file, sidecars will inherit the name of the main file and be moved to the same location rather than being placed in the unknown directory. + +You can change which file extensions are eligible to be considered sidecars using the `--sidecars` argument. For example, to only treat `.xmp` and `.json` as sidecars: + +``` +--sidecars='xmp,json' +``` + +To disable handling sidecars entirely: + +``` +--sidecars='' +``` + + ### Dry run If you want phockup to run without any changes (don't copy/move any files) but just show which changes would be done, enable this feature by using the flag `-y | --dry-run`. diff --git a/src/phockup.py b/src/phockup.py index 0d69728..a4367b7 100755 --- a/src/phockup.py +++ b/src/phockup.py @@ -12,12 +12,21 @@ from src.date import Date from src.exif import Exif +from pprint import pprint logger = logging.getLogger('phockup') ignored_files = ('.DS_Store', 'Thumbs.db') class Phockup: + DEFAULT_SIDECAR_EXTENSIONS = ["json", "xmp", "yml", "yaml"] + """ + Sidecar files are files with the same name as the source file, but with a + different (or additional) extension. For example, 'image.jpg' could have + possible sidecars 'image.jpg.xmp', 'image.json', 'image.yml', etc. + + This member stores the recognized sidecar extensions. + """ DEFAULT_DIR_FORMAT = ['%Y', '%m', '%d'] DEFAULT_NO_DATE_DIRECTORY = "unknown" @@ -37,6 +46,8 @@ def __init__(self, input_dir, output_dir, **args): if output_dir.endswith(os.path.sep): output_dir = output_dir[:-1] + self.skipped_for_later = set() + self.handled_as_sidecar = set() self.input_dir = input_dir self.output_dir = output_dir self.output_prefix = args.get('output_prefix' or None) @@ -53,6 +64,11 @@ def __init__(self, input_dir, output_dir, **args): self.dry_run = args.get('dry_run', False) self.progress = args.get('progress', False) self.max_depth = args.get('max_depth', -1) + self.sidecar_extensions = ( + args.get("sidecars").split(",") + if args.get("sidecars") + else Phockup.DEFAULT_SIDECAR_EXTENSIONS + ) # default to concurrency of one to retain existing behavior self.max_concurrency = args.get("max_concurrency", 1) if self.max_concurrency > 1: @@ -100,6 +116,12 @@ def print_action_report(self, run_time): logger.info(f"Would have moved {self.files_moved} files.") else: logger.info(f"Moved {self.files_moved} files.") + logger.info(f"Files ignored as sidecars ({len(self.skipped_for_later)}):") + pprint(self.skipped_for_later) + logger.info(f"Files processed as sidecars ({len(self.handled_as_sidecar)}):") + pprint(self.handled_as_sidecar) + logger.info("Files ignored as sidecars and not handled as sidecars:") + pprint(list(self.skipped_for_later - self.handled_as_sidecar)) def check_directories(self): """ @@ -242,9 +264,10 @@ def process_files(self, file_paths_to_process): def process_file(self, filename): """ Process the file using the selected strategy - If file is .xmp skip it so process_xmp method can handle it + If file is a sidecar skip it so process_sidecars method can handle it """ - if str.endswith(filename, '.xmp'): + if any([filename.lower().endswith(sc_ext) for sc_ext in self.sidecar_extensions]): + self.skipped_for_later.add(filename) return None progress = f'{filename}' @@ -309,7 +332,7 @@ def process_file(self, filename): self.pbar.write(progress) logger.info(progress) - self.process_xmp(filename, target_file_name, suffix, output) + self.process_sidecars(filename, target_file_name, suffix, output) break suffix += 1 @@ -344,32 +367,41 @@ def get_file_name_and_path(self, filename): target_file_path = os.path.sep.join([output, target_file_name]) return output, target_file_name, target_file_path, target_file_type - def process_xmp(self, original_filename, file_name, suffix, output): + def process_sidecars(self, original_filename, file_name, suffix, output): """ - Process xmp files. These are metadata for RAW images + Given an existing image, handle any sidecar files. """ - xmp_original_with_ext = original_filename + '.xmp' - xmp_original_without_ext = os.path.splitext(original_filename)[0] + '.xmp' - + car_no_ext, car_extension = os.path.splitext(original_filename) + new_car_no_ext = os.path.splitext(file_name)[0] suffix = f'-{suffix}' if suffix > 1 else '' - xmp_files = {} - - if os.path.isfile(xmp_original_with_ext): - xmp_target = f'{file_name}{suffix}.xmp' - xmp_files[xmp_original_with_ext] = xmp_target - if os.path.isfile(xmp_original_without_ext): - xmp_target = f'{(os.path.splitext(file_name)[0])}{suffix}.xmp' - xmp_files[xmp_original_without_ext] = xmp_target - - for original, target in xmp_files.items(): - xmp_path = os.path.sep.join([output, target]) - logger.info(f'{original} => {xmp_path}') + # Generate list of possible sidecars + sidecars = [ + sidecar + for sc_ext in self.sidecar_extensions + for sidecar in ( + car_no_ext + "." + sc_ext, + car_no_ext + car_extension + "." + sc_ext, + ) + ] + # Filter to only those that exist + sidecars = [sidecar for sidecar in sidecars if os.path.isfile(sidecar)] + # Build target filenames for sidecars + sidecars = [ + (sidecar, sidecar.replace(car_no_ext, f"{new_car_no_ext}{suffix}")) + for sidecar in sidecars + ] + + # Perform the move + for original, target in sidecars: + self.handled_as_sidecar.add(original) + sidecar_path = os.path.sep.join([output, target]) + logger.info(f"{original} => {sidecar_path}") if not self.dry_run: if self.move: - shutil.move(original, xmp_path) + shutil.move(original, sidecar_path) elif self.link: - os.link(original, xmp_path) + os.link(original, sidecar_path) else: - shutil.copy2(original, xmp_path) + shutil.copy2(original, sidecar_path)