Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ For direct JSON serialization, combine this option with ``details=False`` to avo

.. code-block:: python

json.dumps(exifread.process_file(file_handle, details=False, builtin_types=True))
json.dumps(
exifread.process_file(file_handle, details=False, builtin_types=True)
)

Usage Example
=============
Expand Down
51 changes: 21 additions & 30 deletions exifread/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from exifread.core.exceptions import ExifNotFound, InvalidExif
from exifread.core.exif_header import ExifHeader
from exifread.core.find_exif import determine_type, get_endian_str
from exifread.core.xmp import find_xmp_data
from exifread.exif_log import get_logger
from exifread.serialize import convert_types
from exifread.tags import DEFAULT_STOP_TAG
Expand All @@ -17,36 +18,9 @@
logger = get_logger()


def _get_xmp(fh: BinaryIO) -> bytes:
xmp_bytes = b""
logger.debug("XMP not in Exif, searching file for XMP info...")
xml_started = False
xml_finished = False
for line in fh:
open_tag = line.find(b"<x:xmpmeta")
close_tag = line.find(b"</x:xmpmeta>")
if open_tag != -1:
xml_started = True
line = line[open_tag:]
logger.debug("XMP found opening tag at line position %s", open_tag)
if close_tag != -1:
logger.debug("XMP found closing tag at line position %s", close_tag)
line_offset = 0
if open_tag != -1:
line_offset = open_tag
line = line[: (close_tag - line_offset) + 12]
xml_finished = True
if xml_started:
xmp_bytes += line
if xml_finished:
break
logger.debug("XMP Finished searching for info")
return xmp_bytes


def process_file(
fh: BinaryIO,
stop_tag=DEFAULT_STOP_TAG,
stop_tag: str = DEFAULT_STOP_TAG,
details=True,
strict=False,
debug=False,
Expand All @@ -56,10 +30,27 @@ def process_file(
builtin_types=False,
) -> Dict[str, Any]:
"""
Process an image file (expects an open file object).
Process an image file to extract EXIF metadata.

This is the function that has to deal with all the arbitrary nasty bits
of the EXIF standard.

:param fh: the file to process, must be opened in binary mode.
:param stop_tag: Stop processing when the given tag is retrieved.
:param details: If `True`, process MakerNotes.
:param strict: If `True`, raise exceptions on errors.
:param debug: Output debug information.
:param truncate_tags: If `True`, truncate the `printable` tag output.
There is no effect on tag `values`.
:param auto_seek: If `True`, automatically `seek` to the start of the file.
:param extract_thumbnail: If `True`, extract the JPEG thumbnail.
The thumbnail is not always present in the EXIF metadata.
:param builtin_types: If `True`, convert tags to standard Python types.

:returns: A `dict` containing the EXIF metadata.
The keys are a string in the format `"IFD_NAME TAG_NAME"`.
If `builtin_types` is `False`, the value will be a `IfdTag` class, or bytes.
IF `builtin_types` is `True`, the value will be a standard Python type.
"""

if auto_seek:
Expand Down Expand Up @@ -121,7 +112,7 @@ def process_file(
xmp_bytes = bytes(xmp_tag.values)
# We need to look in the entire file for the XML
else:
xmp_bytes = _get_xmp(fh)
xmp_bytes = find_xmp_data(fh)
if xmp_bytes:
hdr.parse_xmp(xmp_bytes)

Expand Down
22 changes: 2 additions & 20 deletions exifread/core/exif_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import re
import struct
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError

from exifread.core.exceptions import ExifError
from exifread.core.ifd_tag import IfdTag
from exifread.core.xmp import xmp_bytes_to_str
from exifread.exif_log import get_logger
from exifread.tags import (
DEFAULT_STOP_TAG,
Expand Down Expand Up @@ -700,23 +699,6 @@ def parse_xmp(self, xmp_bytes: bytes):
"""Adobe's Extensible Metadata Platform, just dump the pretty XML."""

logger.debug("XMP cleaning data")

# Pray that it's encoded in UTF-8
# TODO: allow user to specify encoding
xmp_string = xmp_bytes.decode("utf-8")

try:
pretty = parseString(xmp_string).toprettyxml()
except ExpatError:
logger.warning("XMP: XML is not well formed")
self.tags["Image ApplicationNotes"] = IfdTag(
xmp_string, 0, FieldType.BYTE, xmp_bytes, 0, 0
)
return
cleaned = []
for line in pretty.splitlines():
if line.strip():
cleaned.append(line)
self.tags["Image ApplicationNotes"] = IfdTag(
"\n".join(cleaned), 0, FieldType.BYTE, xmp_bytes, 0, 0
xmp_bytes_to_str(xmp_bytes), 0, FieldType.BYTE, xmp_bytes, 0, 0
)
57 changes: 57 additions & 0 deletions exifread/core/xmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""XMP related utilities.."""

from pyexpat import ExpatError
from typing import BinaryIO
from xml.dom.minidom import parseString

from exifread.exif_log import get_logger

logger = get_logger()


def find_xmp_data(fh: BinaryIO) -> bytes:
xmp_bytes = b""
logger.debug("XMP not in Exif, searching file for XMP info...")
xml_started = False
xml_finished = False
for line in fh:
open_tag = line.find(b"<x:xmpmeta")
close_tag = line.find(b"</x:xmpmeta>")
if open_tag != -1:
xml_started = True
line = line[open_tag:]
logger.debug("XMP found opening tag at line position %s", open_tag)
if close_tag != -1:
logger.debug("XMP found closing tag at line position %s", close_tag)
line_offset = 0
if open_tag != -1:
line_offset = open_tag
line = line[: (close_tag - line_offset) + 12]
xml_finished = True
if xml_started:
xmp_bytes += line
if xml_finished:
break
logger.debug("Found %s XMP bytes", len(xmp_bytes))
return xmp_bytes


def xmp_bytes_to_str(xmp_bytes: bytes) -> str:
"""Adobe's Extensible Metadata Platform, just dump the pretty XML."""

logger.debug("Cleaning XMP data ...")

# Pray that it's encoded in UTF-8
# TODO: allow user to specify encoding
xmp_string = xmp_bytes.decode("utf-8")

try:
pretty = parseString(xmp_string).toprettyxml()
except ExpatError:
logger.warning("XMP: XML is not well formed")
return xmp_string
cleaned = []
for line in pretty.splitlines():
if line.strip():
cleaned.append(line)
return "\n".join(cleaned)
51 changes: 49 additions & 2 deletions tests/test_process_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ def test_corrupted_pass():
assert len(tags) == 69


@pytest.mark.parametrize("builtin_types", (True, False))
@pytest.mark.parametrize(
"stop_tag, tag_count", (("ColorSpace", 39), (DEFAULT_STOP_TAG, 51))
)
def test_stop_at_tag(stop_tag, tag_count):
def test_stop_at_tag(builtin_types, stop_tag, tag_count):
file_path = RESOURCES_ROOT / "jpg/Canon_40D.jpg"
with open(file_path, "rb") as fh:
tags = exifread.process_file(fh=fh, stop_tag=stop_tag)
tags = exifread.process_file(
fh=fh, stop_tag=stop_tag, builtin_types=builtin_types
)
assert len(tags) == tag_count


Expand Down Expand Up @@ -118,3 +121,47 @@ def test_stop_tag_with_thumbnail_extract():
with open(file_path, "rb") as fh:
tags = exifread.process_file(fh=fh, details=False, stop_tag="Orientation")
assert tags


@pytest.mark.parametrize("details", (True, False))
@pytest.mark.parametrize("truncate_tags", (True, False))
@pytest.mark.parametrize("stop_tag", ("WhiteBalance", DEFAULT_STOP_TAG))
def test_builtin_types(stop_tag, details, truncate_tags):
"""
When ``builtin_types=True``, always convert to Python types.
Test with various other options to make sure they don't interfere.
The "WhiteBalance" tag is after al the tags tested so must not have an impact.
"""
file_path = RESOURCES_ROOT / "jpg/Canon_DIGITAL_IXUS_400.jpg"
with open(file_path, "rb") as fh:
tags = exifread.process_file(
fh=fh,
builtin_types=True,
stop_tag=stop_tag,
details=details,
truncate_tags=truncate_tags,
)
# Short mapped to string value
assert tags["EXIF ColorSpace"] == "sRGB"
# Short
assert isinstance(tags["EXIF ExifImageLength"], int)
assert tags["EXIF ExifImageLength"] == 75
# Ratio
assert isinstance(tags["EXIF ExposureTime"], float)
assert tags["EXIF ExposureTime"] == 0.005
# ASCII
assert tags["Image Make"] == "Canon"
# Unknown / Undefined
assert tags["EXIF FlashPixVersion"] == "0100"


def test_xmp_no_tag():
"""Read XMP data not in an Exif tag."""

file_path = RESOURCES_ROOT / "tiff/Arbitro.tiff"
with open(file_path, "rb") as fh:
tags = exifread.process_file(
fh=fh,
builtin_types=True,
)
assert len(tags["Image ApplicationNotes"]) == 323