Skip to content

Commit

Permalink
Merge 781dffc into bc6a6e1
Browse files Browse the repository at this point in the history
  • Loading branch information
cole committed Apr 9, 2019
2 parents bc6a6e1 + 781dffc commit 10b0e6b
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 46 deletions.
5 changes: 4 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# html_static_path = ["_static"]

# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
Expand Down Expand Up @@ -326,7 +326,10 @@
# epub_use_index = True

doctest_global_setup = """
import os
import tempfile
import uuid
import lxml
import metsrw
"""
85 changes: 70 additions & 15 deletions docs/reading-mets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,96 @@ Reading METS files
metsrw supports reading METS files from disk, from strings, or from lxml_
`_Element` or `_ElementTree` objects.

.. code-block:: python
.. testcode::

# From a file on disk
mets = metsrw.METSDocument.fromfile('path/to/file')
mets = metsrw.METSDocument.fromfile("../fixtures/complete_mets.xml")

# From a string
mets_str = """<?xml version='1.0' encoding='ASCII'?>
# From bytes
mets_str = b"""<?xml version='1.0' encoding='ASCII'?>
<mets xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.loc.gov/METS/" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version18/mets.xsd">
<metsHdr CREATEDATE="2015-12-16T22:38:48"/>
<structMap ID="structMap_1" LABEL="Archivematica default" TYPE="physical"/>
</mets>"""
mets = metsrw.METSDocument.fromstring(mets_str)

# From an lxml object
tree = lxml.etree.fromfile('path/to/file')
tree = lxml.etree.parse("../fixtures/complete_mets.xml")
mets = metsrw.METSDocument.fromtree(tree)


Accessing METS Data
-------------------

To retrieve an :class:`metsrw.FSEntry`, use the
:func:`~metsrw.METSDocument.get_file` method.
:meth:`~metsrw.METSDocument.get_file` or
:meth:`~metsrw.METSDocument.all_files` methods.

.. code-block:: python
.. doctest::

mets = metsrw.METSDocument()
file_uuid = str(uuid.uuid4())
file_1 = metsrw.FSEntry(
label="hello.pdf", path="test/hello.pdf", type="Item",
file_uuid=file_uuid)
mets.append_file(file_1)
>>> mets = metsrw.METSDocument()
>>> file_uuid = str(uuid.uuid4())
>>> file_1 = metsrw.FSEntry(
... label="hello.pdf", path="test/hello.pdf", type="Item",
... file_uuid=file_uuid)
>>> mets.append_file(file_1)

# Returns file_1
mets.get_file(file_uuid=file_uuid)
>>> mets.get_file(file_uuid=file_uuid)
FSEntry(type='Item', path='test/hello.pdf', use='original', ...)

>>> mets.all_files()
{FSEntry(type='Item', path='test/hello.pdf', use='original', ...)}

# Currently, filtering files can only be done via iteration
>>> [entry for entry in mets.all_files() if entry.use == "original"]
[FSEntry(type='Item', path='test/hello.pdf', use='original', ...)]


`amdSec` and `dmdSec` data is accessible via the
:attr:`~metsrw.FSEntry.amdsecs` and :attr:`~metsrw.FSEntry.dmdsecs`
properties.

.. doctest::

>>> mets = metsrw.METSDocument.fromfile('../fixtures/complete_mets.xml')
>>> fsentry = mets.get_file(file_uuid="ab5c67fc-8f80-4e46-9f20-8d5ae29c43f2")
>>> amdsec1 = fsentry.amdsecs[0]
>>> [section for section in amdsec1.subsections if section.subsection == 'techMD']
[<metsrw.metadata.SubSection ...>]
>>> fsentry.dmdsecs[0]
<metsrw.metadata.SubSection ...>


.. note::
In most cases, you'll want to access PREMIS data via the `get_premis`
series of methods, rather than accessing the `amdSec` or `dmdSec` data
directly. See `Accessing PREMIS Data`_ for more info.


Accessing PREMIS Data
---------------------

To access PREMIS_ metadata associated with a file, use the following
methods:

* :meth:`~metsrw.FSEntry.get_premis_objects`
* :meth:`~metsrw.FSEntry.get_premis_events`
* :meth:`~metsrw.FSEntry.get_premis_agents`
* :meth:`~metsrw.FSEntry.get_premis_rights`


.. doctest::

# Currently, filtering PREMIS objects can only be done via iteration
>>> ingestion_events = []
>>> mets = metsrw.METSDocument.fromfile('../fixtures/complete_mets.xml')
>>> for fsentry in mets.all_files():
... for event in fsentry.get_premis_events():
... if event.type == "ingestion":
... ingestion_events.append(event)
>>> ingestion_events[0]
('event', ...)


.. _lxml: https://lxml.de/index.html
.. _PREMIS: https://www.loc.gov/standards/premis/v3/index.html
72 changes: 42 additions & 30 deletions docs/writing-mets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,39 @@ To add data to a :class:`metsrw.METSDocument`, create and append
:class:`metsrw.FSEntry` objects.


.. code-block:: python
.. doctest::

mets = metsrw.METSDocument()
directory_1 = metsrw.FSEntry(label="test", path="test", type="Directory")
file_1 = metsrw.FSEntry(
label="hello.pdf", path="test/hello.pdf", type="Item",
file_uuid=str(uuid.uuid4()))
directory_1.children.append(file_1)
file_2 = metsrw.FSEntry(
label="demo.jpg", path="test/demo.jpg", type="Item",
file_uuid=str(uuid.uuid4()))
directory_1.children.append(file_2)
mets.append_file(file1)
>>> mets = metsrw.METSDocument()
>>> directory_1 = metsrw.FSEntry(label="test", path="test", type="Directory")
>>> file_1 = metsrw.FSEntry(
... label="hello.pdf", path="test/hello.pdf", type="Item",
... file_uuid=str(uuid.uuid4()))
>>> directory_1.children.append(file_1)
>>> file_2 = metsrw.FSEntry(
... label="demo.jpg", path="test/demo.jpg", type="Item",
... file_uuid=str(uuid.uuid4()))
>>> directory_1.children.append(file_2)
>>> mets.append_file(directory_1)
>>> mets.all_files()
{FSEntry(...), FSEntry(...)}


Adding metadata is done via the :class:`metsrw.FSEntry`.

.. code-block:: python
.. testcode::

file_1 = metsrw.FSEntry(
label="hello.pdf", path="test/hello.pdf", type="Item",
file_uuid=str(uuid.uuid4()))

file1.add_premis_object("<premis>object</premis>")
file1.add_premis_event("<premis>event</premis>")
file1.add_premis_agent("<premis>agent</premis>")
rights = file1.add_premis_rights("<premis>rights</premis>")
dc = file1.add_dublin_core("<dublincore>metadata</dublincore>")
file_1.add_premis_object("<premis>object</premis>")
file_1.add_premis_event("<premis>event</premis>")
file_1.add_premis_agent("<premis>agent</premis>")
rights = file_1.add_premis_rights("<premis>rights</premis>")
dc = file_1.add_dublin_core("<dublincore>metadata</dublincore>")

# Replaces added metatdata
rights.replace_with(file1.add_premis_rights("<premis>newer rights</premis>"))
# Replaces added metadata
rights.replace_with(file_1.add_premis_rights("<premis>newer rights</premis>"))


Serialization
Expand All @@ -50,22 +49,35 @@ Serialization
metsrw supports serialization to file, bytes or lxml_ Element object.


.. code-block:: python
.. testsetup:: serialization

temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "demo.xml")
mets = metsrw.METSDocument()
file1 = metsrw.FSEntry("hello.pdf", file_uuid=str(uuid.uuid4()))
mets.append_file(file1)

.. testcleanup:: serialization

os.remove(output_path)
os.removedirs(temp_dir)

.. doctest:: serialization

>>> mets = metsrw.METSDocument()
>>> file1 = metsrw.FSEntry("hello.pdf", file_uuid=str(uuid.uuid4()))
>>> mets.append_file(file1)

>>> # To file on disk
>>> mets.write("/path/to/file")
# To file on disk
>>> mets.write(output_path)

>>> # To _Element object
# To _Element object
>>> mets.serialize()
<Element {http://www.loc.gov/METS/}mets at 0x104f89c88>
<Element {http://www.loc.gov/METS/}mets ...>

>>> # To bytes
# To bytes
>>> mets.tostring()
b'<?xml version=\'1.0\' encoding=\'ASCII\'?>\n<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version111/mets.xsd">\n <mets:metsHdr CREATEDATE="2019-03-26T23:16:08"/>\n <mets:fileSec>\n <mets:fileGrp USE="original">\n <mets:file ID="file-ad6a74d1-f8c1-4a33-a2e4-469608e3331a" GROUPID="Group-ad6a74d1-f8c1-4a33-a2e4-469608e3331a">\n <mets:FLocat xlink:href="hello.pdf" LOCTYPE="OTHER" OTHERLOCTYPE="SYSTEM"/>\n </mets:file>\n </mets:fileGrp>\n </mets:fileSec>\n <mets:structMap ID="structMap_1" LABEL="Archivematica default" TYPE="physical">\n <mets:div TYPE="Item" LABEL="hello.pdf">\n <mets:fptr FILEID="file-ad6a74d1-f8c1-4a33-a2e4-469608e3331a"/>\n </mets:div>\n </mets:structMap>\n <mets:structMap ID="structMap_2" LABEL="Normative Directory Structure" TYPE="logical">\n <mets:div TYPE="Item" LABEL="hello.pdf"/>\n </mets:structMap>\n</mets:mets>\n'
b'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<mets:mets ...'


.. _lxml: https://lxml.de/index.html

0 comments on commit 10b0e6b

Please sign in to comment.