# EXIF Date-based Image Mover

This script scans (recursively) a directory of images, uses `exiftool` to get the best possible origin date for each image, and moves the images into a new directory structure based on the dates found.

This is a followup to the previous `exif-date-file-copier.ipynb` notebook after a few things happened:
1. I discovered that the various Python-based libraries don't always produce appropiate origin dates.
1. I manually pruned out a bunch of copied files that I decided not to keep.

Prerequsites:
- `pip install tqdm`
- `brew install exiftool`

Note: only use `exiftool` because it's the most reliable despite potentially being slower due to forking and running it as a Perl program. Or maybe it's faster than the Python solutions? The performance difference isn't noticeable at this point to care to run benchmarks.

In [74]:
import glob
import logging
import multiprocessing
import os
import random
import re
import shutil
import subprocess
import time

import tqdm
from infinitewarp_utils import timing

logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)

In [69]:
class Config(object):
    source_dir = os.path.expanduser('~/Pictures/full-archive/corrected-names')
    output_dir = os.path.expanduser('~/Pictures/full-archive/grouped-names')
    dupes_dir = os.path.join(output_dir, 'dupes')
    no_dates_dir = os.path.join(output_dir, 'no-dates')
    log_path = os.path.join(output_dir, 'process-log.txt')

    @classmethod
    def makedirs(cls):
        os.makedirs(cls.output_dir, exist_ok=True)
        os.makedirs(cls.dupes_dir, exist_ok=True)
        os.makedirs(cls.no_dates_dir, exist_ok=True)

Config.makedirs()

In [64]:
class Patterns(object):
    exif_date_sub = re.compile(r'(\d{4}):(\d{2}):(\d{2} \d{2})\:(\d{2})\:(\d{2})')
    exif_date_rep = r'\1-\2-\3.\4.\5'

In [65]:
def get_exif_created_date(filepath):
    completed = subprocess.run(['exiftool', filepath], stdout=subprocess.PIPE)
    tags = {}
    for line in completed.stdout.split(b'\n'):
        try:
            line = line.decode('utf-8')
        except:
            continue
        if ':' in line:
            loc = line.find(':')
            key = line.split(':')[0]
            value = line[len(key) + 1:]
            tags[key.strip()] = value.strip()
            
    return tags.get('Date/Time Original') or tags.get('Create Date') or tags.get('Modify Date')


def build_new_name(filepath):
    created_date = get_exif_created_date(filepath)
    if created_date:
        return '{}.{}'.format(
            Patterns.exif_date_sub.sub(Patterns.exif_date_rep, created_date).lower(),
            filepath.split('.')[-1]
        )
    return None

In [27]:
pattern = Config.source_dir + '/**/*-*-* *.*.*.*'
source_paths = list(glob.iglob(pattern, recursive=True))

In [57]:
def build_new_names(source_paths, use_miltprocessing=True):
    bar = tqdm.tqdm_notebook
    
    # without multiprocessing == ~5/sec ~14min for 4195 files
    # with multiprocessing(4) == ~15/sec ~5min for 4195 files
    if use_multiprocessing:
        with multiprocessing.Pool(processes=4) as pool:
            results = [
                (pool.apply_async(build_new_name, (filepath,)), filepath) for filepath in source_paths
            ]
            relocations = {
                filepath: res.get() for res, filepath in bar(results)
            }
    else:
        relocations = {
            filepath: build_new_name(filepath) for filepath in bar(source_paths)
        }
    return relocations

In [60]:
mapped_names = build_new_names(source_paths)

In [82]:
def uniquify_filename(filename):
    ext = filename.split('.')[-1]
    return '{}.{}{}.{}'.format(
        filename[:-(len(ext) + 1)],
        time.time(),
        str(random.randint(0,9999)).zfill(4),
        ext
    )


def move_images(mapped_names, logpath):
    moved_names = []
    years = []
    
    with open(logpath, 'a') as log_fp:
        for source_path, new_name in tqdm.tqdm_notebook(mapped_names.items()):
            if new_name is None:
                new_name = os.path.basename(source_path)
                if new_name not in moved_names:
                    moved_names.append(new_name)
                else:
                    new_name = uniquify_filename(new_name)
                new_path = os.path.join(Config.no_dates_dir, new_name)
                print(f'{source_path} → {new_path}', file=log_fp)
                shutil.move(source_path, new_path)
            elif new_name in moved_names:
                new_name = uniquify_filename(new_name)
                new_path = os.path.join(Config.dupes_dir, new_name)
                print(f'{source_path} → {new_path}', file=log_fp)
                shutil.move(source_path, new_path)
            else:
                moved_names.append(new_name)
                year = new_name[:4]
                year_dir = os.path.join(Config.output_dir, year)
                if year not in years:
                    years.append(year)
                    os.makedirs(year_dir, exist_ok=True)
                new_path = os.path.join(year_dir, new_name)
                print(f'{source_path} → {new_path}', file=log_fp)
                shutil.move(source_path, new_path)

In [83]:
move_images(mapped_names, Config.log_path)