In [None]:
import fnmatch
import os
import json
from PIL import Image
from PIL.ExifTags import TAGS
from pprintpp import pprint as pp
import photohash
import hashlib

maps_directory = "/Volumes/Arcadia/State Records Office/Disk1/Cons"
json_file = "sro_image_list.json"

images = []
for root, dirnames, filenames in os.walk(maps_directory):
    for filename in fnmatch.filter(filenames, "*.jpg"):
        # Skip OSX Preview's temp files
        if filename.startswith("._"):
            print filename
            continue
        
        img_path = os.path.join(root, filename)
        
        # Open a PIL Image object so we can get at the internals of the image
        img = Image.open(img_path)
        
        img_info = {
            "filename": filename,
            "cons_folder": os.path.basename(os.path.normpath(root)),
            "photohash": photohash.average_hash(img_path),
            "md5hash": hashlib.md5(img.tobytes()).hexdigest(),
            "format": img.format,
            "mode": img.mode,
            "width": img.size[0],
            "height": img.size[1],
            "filesize": os.path.getsize(img_path),
            "info": img.info
        }
        
        # For the TIF files only
#         if img_info["info"]["dpi"][0] == 150.0:
#             img_info["info"]["dpi"] = ("150", "150")
            
#         if img_info["info"]["dpi"][0] == 200.0:
#             img_info["info"]["dpi"] = ("200", "200")
            
#         if img_info["info"]["dpi"][0] == 300.0:
#             img_info["info"]["dpi"] = ("300", "300")
        
        # Decode the JPEG EXIF data
        info = img._getexif()
        if info is not None:
            exif = {}
            for tag, value in info.items():
                decoded = TAGS.get(tag, tag)
                exif[decoded] = value
            img_info["exif"] = exif
        
        # Nuke the raw copy of the EXIF data contained within `info`
        if "exif" in img_info["info"]:
            del img_info["info"]["exif"]
        
        # And we'll skip the ICC Profile because bugger dealing with the Latin-1 junk that's in there
        if "icc_profile" in img_info["info"]:
            del img_info["info"]["icc_profile"]
            
        images.append(img_info)

with open(json_file, "w") as outfile:
    json.dump(images, outfile)    
print "Fin"

In [None]:
import psycopg2

connection = "host='pg01.geogeeks.org' dbname='lost_towns' user='keith' password='geogeeks'"
conn = psycopg2.connect(connection)
cursor = conn.cursor()

# cursor.execute("SELECT * FROM sro_images LIMIT 100")
# records = cursor.fetchall()
# pp(records)

for index, image in enumerate(images):
    print index
    
    if "exif" not in image:
        image["exif"] = {}

    query =  ("INSERT INTO sro_images (filename, format, mode, filesize, width, height, "
                                      "info, exif, md5hash, photohash, cons_folder) VALUES "
                 "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);")
    data = (image["filename"], image["format"], image["mode"], image["filesize"], 
            image["width"], image["height"], json.dumps(image["info"]), json.dumps(image["exif"]),
            image["md5hash"], image["photohash"], image["cons_folder"])
    cursor.execute(query, data)

print "Committing..."
conn.commit()
print "Fin"