In [4]:

import piexif
import pandas as pd
import os


# a set of metadata tags to read (only)
READ_TAGS = {'Make', 'Model', 'Orientation', 'XResolution', 'YResolution', 'Software', 'DateTime', 'ExposureTime', 'FNumber', 'ExifVersion', 'DateTimeOriginal', 'DateTimeDigitized', 'ShutterSpeedValue', 'ApertureValue', 'BrightnessValue', 'ExposureBiasValue', 'MeteringMode', 'Flash', 'FocalLength', 'ColorSpace', 'PixelXDimension', 'PixelYDimension', 'LensMake', 'LensModel', 'GPSLatitudeRef', 'GPSLatitude', 'GPSLongitudeRef', 'GPSLongitude', 'GPSAltitude', 'XResolution', 'YResolution'}



def list_image_tags_names(image_path:str) -> list:
    """
    return a list of all metadata tag names for an image.
    """
    # list of tag names to return at the end
    tag_names = []
    # read all the metadata tags using piexif
    exif_dict = piexif.load(image_path)
    # loop through these types of metadata tags
    for ifd in ("0th", "Exif", "GPS", "1st"):
        for tag in exif_dict[ifd]:
            # get the actual tag name from its code
            tag_name = piexif.TAGS[ifd][tag]["name"]
            # append to our list of tag names to return
            tag_names.append(tag_name)
    # return the list
    return tag_names




def get_image_tags(image_path:str, read_tags:set = READ_TAGS) -> dict:
    """
    return the metadata tags of an image as a dict
    """

    # an inner function to transform GPS values
    def _convert_gps(gps_tuple):
        deg = round(gps_tuple[0][0] / gps_tuple[0][1], 4)
        min = round(gps_tuple[1][0] / gps_tuple[1][1], 4)
        sec = round(gps_tuple[2][0] / gps_tuple[2][1], 4)
        d = round(deg + ((min / 60) + (sec / 3600)), 6)
        return d

    # empty list of tags
    tags = {}
    # read all the metadata tags using piexif
    exif_dict = piexif.load(image_path)
    # loop through these types of metadata tags
    for ifd in ("0th", "Exif", "GPS", "1st"):
        for tag in exif_dict[ifd]:
            # get the actual tag name form its code
            tag_name = piexif.TAGS[ifd][tag]["name"]
            if tag_name in read_tags:
                # get the value of this tag
                tag_value = exif_dict[ifd][tag]
                if isinstance(tag_value, bytes):
                    tag_value = tag_value.decode(encoding="utf-8", errors="ignore")
                elif isinstance(tag_value, tuple) and len(tag_value) == 2:
                    # this is tuple of value and its unit
                    tag_value = round(tag_value[0] / tag_value[1], 4)
                elif tag_name == 'GPSLatitude':
                    tag_value = _convert_gps(tag_value)
                elif tag_name == 'GPSLongitude':
                    tag_value = _convert_gps(tag_value)
                # add our tag to our dict
                tags[tag_name] = tag_value
    # return the tags
    return tags




def get_image_tags_to_dataframe(image_paths:list, read_tags:set = READ_TAGS) -> pd.DataFrame:
    """
    Take a list of image paths and returns a dataframe of all their metadata tag values

    Args:
        image_paths: list of image paths
        read_tags: a list of metadata tag names to read
    """
    columns = ["image_path", "errors"]
    columns.extend(list(read_tags))
    df = pd.DataFrame(columns=columns)
    # iterate through each image
    for image_path in image_paths:
        # dataframe row to insert
        row = {}
        row["image_path"] = image_path
        row["errors"] = None
        try:
            # append tags to our row
            tags = get_image_tags(image_path, read_tags)
            row.update(tags)
        except Exception as err:
            row["errors"] = str(err)
        finally:
            # add the row to our dataframe
            df = df.append(row, ignore_index=True)
    # return the entire dataframe
    return df





image_paths = []
for img in os.listdir("./images/"):
    image_paths.append(os.path.join("./images/", img))


df = get_image_tags_to_dataframe(image_paths)
df.to_csv("images.csv")
print(df)


                        image_path            errors ExifVersion  \
0    ./images/1112201329_HDR~2.jpg              None         NaN   
1             ./images/tj_food.jpg  division by zero         NaN   
2           ./images/IMG_7690.jpeg              None        0232   
3        ./images/grasshopper2.jpg              None        0220   
4     ./images/20220411_075008.jpg              None        0220   
..                             ...               ...         ...   
111        ./images/botanical2.jpg              None        0220   
112              ./images/lake.jpg              None        0220   
113              ./images/pool.jpg              None        0220   
114       ./images/1213202230b.jpg              None         NaN   
115  ./images/20211217_1730392.jpg              None        0220   

        DateTimeOriginal  ExposureBiasValue  GPSAltitude  GPSLongitude  \
0                    NaN                NaN          NaN           NaN   
1                    NaN           