Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 284 lines (241 sloc) 9.45 KB
#!/usr/bin/env python3
# A simple script for organizing photos and videos based on date.
# Copyright 2014 Jonathan Koren <jonathan@jonathankoren.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# Config for creating destination subdirectories
YYYY_MM_DD_FORMAT = 'yyyy/yyyy-mm-dd'
# YYYY_MM_DD_FORMAT = 'yyyy-mm-dd'; # Picasa default
#YYYY_MM_DD_FORMAT = 'yyyy/mm/dd';
#YYYY_MM_DD_FORMAT = 'yyyy/MMM dd';
#########################################################
# There are no user servicable parts below this line #
#########################################################
import calendar
import datetime
import hachoir.parser
import hachoir.metadata
import logging
import os
from PIL import Image
from PIL.ExifTags import TAGS
import re
import shutil
import sys
import time
import xml.etree.ElementTree
PHOTO_EXTENSIONS = set(['gif', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'wmf', 'cr2', 'heic'])
MOVIE_EXTENSIONS = set(['avi', 'mov', 'mp4', 'mpg', 'mpeg', 'qt', 'wmv'])
logging.basicConfig(level=logging.INFO)
def listFiles(dir):
'''Perform a depth first search of the specified
directory and return a list of all files found.'''
l = list()
basedir = dir
subdirlist = []
for item in os.listdir(dir):
if os.path.isfile(os.path.join(basedir, item)):
l.append(os.path.join(basedir, item))
else:
subdirlist.append(os.path.join(basedir, item))
for subdir in subdirlist:
if subdir[0] != '.':
l = l + listFiles(subdir)
return l
def getExifTag(img, tagName):
try:
for (k, v) in img._getexif().items():
if TAGS.get(k) == tagName:
return v
except AttributeError:
return None
def getDateFromFilename(filename):
simple_filename = os.path.basename(filename)
year = '((18|19|20)[0-9]{2})'
month = '([01][0-9])'
day = '([0-3][0-9])'
sep = '[-_.: ]?'
y = -1
m = -1
d = -1
match = re.search(sep.join(['', year, month, day, '']), simple_filename)
if match != None:
y = int(match.group(1))
m = int(match.group(3))
d = int(match.group(4))
match = re.search(sep.join(['', day, month, year, '']), simple_filename)
if match != None:
d = int(match.group(3))
m = int(match.group(2))
y = int(match.group(1))
match = re.search(sep.join(['', month, day, year, '']), simple_filename)
if match != None:
m = int(match.group(3))
d = int(match.group(1))
y = int(match.group(2))
if (1800 <= y) and (y <= 2099) and \
(1 <= m) and (m <= 12) and \
(1 <= d) and (d <= 31):
try:
return datetime.datetime(y, m, d).timestamp()
except ValueError:
return None
return None
def get_timestamp_from_photo_metadata(filename):
try:
img = Image.open(filename)
for exiftag in ['DateTimeOriginal', 'DateTime']:
dateTime = getExifTag(img, exiftag)
if dateTime != None:
dts = dateTime[:dateTime.rindex(' ')].split(':')
return datetime.datetime(int(dts[0]), int(dts[1]), int(dts[2])).timestamp()
except IOError:
pass
except OSError:
pass
return None
def get_timestamp_from_movie_metadata(filename):
try:
metadata = hachoir.metadata.extractMetadata(hachoir.parser.createParser(filename))
if metadata is not None:
return metadata.get('creation_date').timestamp()
except IOError:
pass
except OSError:
pass
return None
def get_timestamp_from_aae_content(filename):
tree = xml.etree.ElementTree.parse(filename)
if tree is None:
return None
earliest_timestamp = None
for ele in tree.getroot().findall('./dict/date'):
try:
timestamp = calendar.timegm(time.strptime(ele.text, '%Y-%m-%dT%H:%M:%SZ'))
if earliest_timestamp is None or timestamp < earliest_timestamp:
earliest_timestamp = timestamp
except ValueError:
pass
return earliest_timestamp
def get_contemporaneous_timestamp(filename):
contemporaneous_time = None
filename_no_ext = filename[:filename.rindex('.') + 1]
for ext in PHOTO_EXTENSIONS:
filename_pair = filename_no_ext + ext
try:
os.stat(filename_pair)
except OSError:
filename_pair = filename_no_ext + ext.upper()
try:
os.stat(filename_pair)
except OSError:
continue
(y, m, d) = getDate(filename_pair)
contemporaneous_time = time.mktime(datetime.datetime(year=y, month=m, day=d).timetuple())
if contemporaneous_time is not None:
break
return contemporaneous_time
def getDate(filename):
'''Returns the date that the photo was taken on by
reading the associated EXIF data. If EXIF is not
available, falls back to filename heuristics, and
then finally to file creation date.'''
if filename[filename.rindex(os.path.sep) + 1] == '.':
return None
extension = filename[filename.rindex('.') + 1:].lower()
if (extension != 'aae') and \
(extension not in PHOTO_EXTENSIONS) and \
(extension not in MOVIE_EXTENSIONS):
return None
metadata_time = None
contemporaneous_time = None
if extension == 'aae':
metadata_time = get_timestamp_from_aae_content(filename)
contemporaneous_time = get_contemporaneous_timestamp(filename)
elif extension in PHOTO_EXTENSIONS:
metadata_time = get_timestamp_from_photo_metadata(filename)
elif extension in MOVIE_EXTENSIONS:
metadata_time = get_timestamp_from_movie_metadata(filename)
contemporaneous_time = get_contemporaneous_timestamp(filename)
filename_time = getDateFromFilename(filename)
# creation time is really stupid, you can't really trust the semantics of
# any of these, except modification time, but c'est la vie
ctime = os.path.getctime(filename)
mtime = os.path.getmtime(filename)
# Pick the earliest time, because someone had to go and set a piece of
# metadata for that, and ctime and mtime are almost always when the file
# was copied to the disk, not when the photo was taken.
earliestTime = None
i = -1
for t in [ metadata_time, filename_time, contemporaneous_time, ctime, mtime ]:
if t is not None:
dt = datetime.datetime.fromtimestamp(t)
if earliestTime is None:
earliestTime = t
else:
if t < earliestTime:
earliestTime = t
dt = datetime.datetime.fromtimestamp(earliestTime)
return (dt.year, dt.month, dt.day)
def makeDestDirYMD(destDirName, ymd):
dest = destDirName + os.path.sep
if ((1900 <= ymd[0]) and (ymd[0] <= 2100) and
(1 <= ymd[1]) and (ymd[1] <= 12) and
(1 <= ymd[2]) and (ymd[2] <= 31)):
if YYYY_MM_DD_FORMAT == 'yyyy/yyyy-mm-dd':
dest = dest + ('%d/%d-%02d-%02d' %
(ymd[0], ymd[0], ymd[1], ymd[2]))
elif YYYY_MM_DD_FORMAT == 'yyyy-mm-dd':
dest = dest + ('%d-%02d-%02d' % (ymd[0], ymd[1], ymd[2]))
elif YYYY_MM_DD_FORMAT == 'yyyy/mm/dd':
dest = dest + ('%d' % ymd[0]) + os.path.sep + ('%02d' % ymd[1]) + os.path.sep + ('%02d' % ymd[2])
else:
dest = dest + ('%d' % ymd[0]) + os.path.sep + \
calendar.month_name[ymd[1]] + ' ' + \
str(ymd[2]) + ', ' + str(ymd[0])
else:
dest = dest + '_invalid_date_'
return dest
########################################################
if __name__ == '__main__':
if (len(sys.argv) != 3):
print('usage: photo-autorganize <source> <dir>')
sys.exit()
sourceDirname = sys.argv[1]
destDirName = sys.argv[2]
for filename in listFiles(sourceDirname):
ymd = getDate(filename)
if ymd == None:
logging.info('skipping ' + filename)
continue
destDirnameYMD = makeDestDirYMD(destDirName, ymd)
destFilename = destDirnameYMD + os.path.sep + os.path.basename(filename).lower()
try:
os.makedirs(destDirnameYMD)
except OSError:
pass
try:
os.stat(destFilename)
# FIXME check the SHA1s on filename and destFilename. If they're the same
# info a skip. If they're different, rename the destFilename and copy
logging.warning('refusing to copy ' + filename +
' to existing ' + destFilename)
except OSError:
# good! stat failed
try:
logging.info('copying ' + filename + ' to ' + destFilename)
shutil.copy2(filename, destFilename)
except OSError as detail:
logging.error('tried to copy ' + filename + ' to ' + destFilename + ' but failed. ' + str(detail))
You can’t perform that action at this time.