In [1]:
import os
from PIL import Image
from PIL.ExifTags import TAGS
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import csv
from fractions import Fraction

In [2]:
def get_exif_data(image_path):
    """Extract EXIF data from an image and sanitize values."""
    try:
        img = Image.open(image_path)
        exif_data = img._getexif()
        if exif_data:
            exif = {}
            for tag_id, value in exif_data.items():
                tag_name = TAGS.get(tag_id, tag_id)
                # Sanitize data: Convert all values to strings and handle None
                if isinstance(value, bytes):
                    value = value.decode(errors='replace')  # Decode bytes
                exif[tag_name] = str(value) if value is not None else ""
            return exif
        else:
            return None
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None
    

In [3]:
# parallel processing

def scan_directory(root_dir):
    data = []
    extensions = ['.jpg', '.jpeg', '.tiff', '.png']
    
    with ThreadPoolExecutor() as executor:
        futures = []
        # Capture file and image_path at submission time
        for root, dirs, files in os.walk(root_dir):
            for file in files:
                if os.path.splitext(file)[1].lower() in extensions:
                    image_path = os.path.join(root, file)
                    # Submit a task with explicit arguments (file, image_path)
                    futures.append(
                        executor.submit(
                            lambda ip=image_path, fl=file: get_exif_data_with_file(ip, fl)
                        )
                    )
        
        # Process results
        for future in tqdm(futures, desc="Processing images"):
            exif = future.result()
            if exif:
                data.append(exif)
    return data

def get_exif_data_with_file(image_path, file):
    """Wrapper function to add filename/path during processing."""
    exif = get_exif_data(image_path)  # Your original get_exif_data function
    if exif:
        exif['Filename'] = file
        exif['Filepath'] = image_path
    return exif

In [4]:
def convert_exposure_time(exposure_str):
    """Convert ExposureTime (decimal or tuple) to fraction string."""
    if not exposure_str or pd.isna(exposure_str):
        return ""
    try:
        # Case 1: ExposureTime is a tuple string like "(1, 125)"
        if exposure_str.startswith('(') and exposure_str.endswith(')'):
            num_den = exposure_str[1:-1].split(',')
            numerator = int(num_den[0].strip())
            denominator = int(num_den[1].strip())
            if denominator == 1:
                return f"{numerator}"
            else:
                return f"{numerator}/{denominator}"
        # Case 2: ExposureTime is a decimal string like "0.008"
        else:
            exposure_float = float(exposure_str)
            frac = Fraction(exposure_float).limit_denominator(1000)
            if frac.denominator == 1:
                return f"{frac.numerator}"
            else:
                return f"{frac.numerator}/{frac.denominator}"
    except:
        return ""

In [5]:
def main():
    root_dir = "../Photo & Design/20241229 - Family Session/"  # Change this to your directory
    output_csv = "exif_data.csv"
    
    # Collect all EXIF data
    exif_data = scan_directory(root_dir)
    
    # Convert to DataFrame and save as CSV
    if exif_data:
        df = pd.DataFrame(exif_data)
        
        df = df.apply(lambda x: x.str.strip() if x.dtype == 'object' else x)

#         unwanted_columns = ['PrintImageMatching', 'MakerNote','UserComment','GPSInfo']
#         df = df.drop(columns=unwanted_columns, errors='ignore')  # Ignore if column doesn't exist

        # Add ExposureFraction column
        df['ExposureFraction'] = df['ExposureTime'].apply(convert_exposure_time)

        # Reorder columns to ensure 'Filename' is first
        columns = ['Filename'] + [col for col in df.columns if col != 'Filename']
        df = df[columns]
        
        # Columns to export
        df1 = df[["Filename","Make","Model","DateTime","YResolution","XResolution",
                 "ExposureBiasValue","MaxApertureValue","Flash","FocalLength","FocalLengthIn35mmFilm",
                 "ExifImageWidth","ExifImageHeight","FNumber","ISOSpeedRatings","ExposureFraction",
                 "LensModel","Filepath"]]

        # Fix CSV escaping issues by quoting all fields and specifying an escape character
        df1.to_csv(
            output_csv,
            index=False,
            quoting=csv.QUOTE_ALL,  # Enclose all fields in quotes
            escapechar='\\',         # Use backslash to escape special chars
            encoding='utf-8'
        )
        print(f"Data saved to {output_csv}")
    else:
        print("No EXIF data found.")
        
if __name__ == "__main__":
    main()

Processing images: 100%|████████████████████████████████████████████████████████████| 422/422 [00:00<00:00, 603.62it/s]


Data saved to exif_data.csv
