Merge branch 'projectxml'

datalad · Mar 1, 2023 · dcb2c13 · dcb2c13
2 parents 9cf4566 + 6057002
commit dcb2c13
Show file tree

Hide file tree

Showing 5 changed files with 301 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@ $ python -m pip install git+https://github.com/datalad/datalad-redcap.git@main
 
 ## Commands
 - `export-redcap-form`: Export records from selected forms (instruments)
+- `export-redcap-project-xml`: Export entire project as a REDCap XML File
 - `export-redcap-report`: Export a report that was defined in a project
 - `redcap-query`: Show names of available forms (instruments)
 

diff --git a/datalad_redcap/__init__.py b/datalad_redcap/__init__.py
@@ -23,6 +23,13 @@
             # optional name of the command in the Python API
             'export_redcap_form'
         ),
+        (
+            'datalad_redcap.export_project_xml',
+            'ExportProjectXML',
+            'export-redcap-project-xml',
+            'export_redcap_project_xml'
+        ),
+
         (
             'datalad_redcap.export_report',
             'ExportReport',

diff --git a/datalad_redcap/export_project_xml.py b/datalad_redcap/export_project_xml.py
@@ -0,0 +1,252 @@
+from pathlib import Path
+from typing import Optional
+
+from redcap.methods.project_info import ProjectInfo
+
+from datalad.distribution.dataset import (
+    require_dataset,
+    resolve_path,
+)
+from datalad.interface.common_opts import (
+    nosave_opt,
+    save_message_opt,
+)
+from datalad_next.commands import (
+    EnsureCommandParameterization,
+    ValidatedInterface,
+    Parameter,
+    build_doc,
+    datasetmethod,
+    eval_results,
+    get_status_dict,
+)
+from datalad_next.constraints import (
+    EnsureBool,
+    EnsurePath,
+    EnsureStr,
+    EnsureURL,
+)
+from datalad_next.constraints.dataset import (
+    DatasetParameter,
+    EnsureDataset,
+)
+from datalad_next.utils import CredentialManager
+
+from .utils import (
+    update_credentials,
+    check_ok_to_edit,
+)
+
+
+def export_project_xml(
+    self,
+    metadata_only: bool = False,
+    files: bool = False,
+    survey_fields: bool = False,
+    dags: bool = False,
+):
+    """Export Project XML
+
+    This function is a patch for PyCap ProjectInfo class
+    """
+
+    format_type = "xml"
+    payload = self._initialize_payload(
+        content="project_xml",
+        format_type=format_type,
+    )
+
+    payload["returnMetadataOnly"] = metadata_only
+    payload["exportFiles"] = files
+    payload["exportSurveyFields"] = survey_fields
+    payload["exportDataAccessGroups"] = dags
+
+    return_type = self._lookup_return_type(format_type, request_type="export")
+    response = self._call_api(payload, return_type)
+
+    return self._return_data(
+        response=response,
+        content="instrument",
+        format_type=format_type,
+        df_kwargs=None,
+    )
+
+
+# monkey-patch ProjectInfo
+ProjectInfo.export_project_xml = export_project_xml
+
+
+@build_doc
+class ExportProjectXML(ValidatedInterface):
+    """Export entire project (metadata & data) as a REDCap XML file
+
+    This exports all the project content (all records, events, arms,
+    instruments, fields, and project attributes) as a single XML
+    file. The file can be used to create a clone of the project on the
+    same or another REDCap instance. It can also be useful for
+    archival.
+
+    By default, the export will include all data as well. You can
+    choose to export metadata only.
+
+    Note that when exporting data, Data Export user rights will be
+    applied to any returned data. 'Full Data Set' export rights in the
+    project are required to obtain everything.
+    """
+
+    _params_ = dict(
+        url=Parameter(
+            args=("url",),
+            doc="API URL to a REDCap server",
+        ),
+        outfile=Parameter(
+            args=("outfile",),
+            doc="file to write. Existing files will be overwritten.",
+        ),
+        dataset=Parameter(
+            args=("-d", "--dataset"),
+            metavar="PATH",
+            doc="""the dataset in which the output file will be saved.
+            The `outfile` argument will be interpreted as being relative to
+            this dataset.  If no dataset is given, it will be identified
+            based on the working directory.""",
+        ),
+        credential=Parameter(
+            args=("--credential",),
+            metavar="name",
+            doc="""name of the credential providing a token to be used for
+            authorization. If a match for the name is found, it will
+            be used; otherwise the user will be prompted and the
+            credential will be saved. If the name is not provided, the
+            last-used credential matching the API url will be used if
+            present; otherwise the user will be prompted and the
+            credential will be saved under a default name.""",
+        ),
+        metadata_only=Parameter(
+            args=("--metadata-only",),
+            action="store_true",
+            doc="""Return only metadata (all fields, forms, events, and arms), 
+            do not include data""",
+        ),
+        survey_fields=Parameter(
+            args=("--no-survey-fields",),
+            dest="survey_fields",
+            action="store_false",
+            doc="Do not include survey identifier or survey timestamp fields",
+        ),
+        message=save_message_opt,
+        save=nosave_opt,
+    )
+
+    _validator_ = EnsureCommandParameterization(
+        dict(
+            url=EnsureURL(required=["scheme", "netloc", "path"]),
+            outfile=EnsurePath(),
+            dataset=EnsureDataset(installed=True, purpose="export redcap report"),
+            credential=EnsureStr(),
+            metadata_only=EnsureBool(),
+            survey_fields=EnsureBool(),
+            message=EnsureStr(),
+            save=EnsureBool(),
+        ),
+    )
+
+    @staticmethod
+    @datasetmethod(name="export_redcap_project_xml")
+    @eval_results
+    def __call__(
+        url: str,
+        outfile: Path,
+        dataset: Optional[DatasetParameter] = None,
+        credential: Optional[str] = None,
+        metadata_only: bool = False,
+        survey_fields: bool = True,
+        message: Optional[str] = None,
+        save: bool = True,
+    ):
+
+        # work with a dataset object
+        if dataset is None:
+            # https://github.com/datalad/datalad-next/issues/225
+            ds = require_dataset(None)
+        else:
+            ds = dataset.ds
+
+        # sort out the path in context of the dataset
+        res_outfile = resolve_path(outfile, ds=ds)
+
+        # refuse to operate if target file is outside the dataset or not clean
+        ok_to_edit, unlock = check_ok_to_edit(res_outfile, ds)
+        if not ok_to_edit:
+            yield get_status_dict(
+                action="export_redcap_report",
+                path=res_outfile,
+                status="error",
+                message=(
+                    "Output file status is not clean or the file does not "
+                    "belong directly to the reference dataset."
+                ),
+            )
+            return
+
+        # determine a token
+        credman = CredentialManager(ds.config)
+        credname, credprops = credman.obtain(
+            name=credential,
+            prompt="A token is required to access the REDCap project API",
+            type_hint="token",
+            query_props={"realm": url},
+            expected_props=("secret",),
+        )
+
+        # create an api object
+        api = ProjectInfo(
+            url=url,
+            token=credprops["secret"],
+        )
+
+        # perform the api query
+        # note: not exporting files or data access groups
+        response = api.export_project_xml(
+            metadata_only=metadata_only,
+            survey_fields=survey_fields,
+        )
+
+        # query went well, store or update credentials
+        update_credentials(credman, credname, credprops)
+
+        # unlock the file if needed, and write contents
+        if unlock:
+            ds.unlock(res_outfile)
+        with open(res_outfile, "wt") as f:
+            f.write(response)
+
+        # save changes in the dataset
+        if save:
+            ds.save(
+                message=message
+                if message is not None
+                else _write_commit_message(
+                    "Export REDCap Project XML",
+                    metadata_only=metadata_only,
+                    survey_fields=survey_fields,
+                ),
+                path=res_outfile,
+            )
+
+        # yield successful result if we made it to here
+        yield get_status_dict(
+            action="export_redcap_project_xml",
+            path=res_outfile,
+            status="ok",
+        )
+
+
+def _write_commit_message(header: str, **export_opts: str) -> str:
+    """Return a formatted commit message that lists export options"""
+    if len(export_opts) > 0:
+        option_list = "\n".join([f"- {k}: {v}" for k, v in export_opts.items()])
+        message = f"{header}\n\nExport options:\n{option_list}"
+    else:
+        message = header
+    return message
diff --git a/datalad_redcap/tests/test_export_project_xml.py b/datalad_redcap/tests/test_export_project_xml.py
@@ -0,0 +1,39 @@
+from pathlib import Path
+from unittest.mock import patch
+
+from datalad.api import export_redcap_project_xml
+from datalad.distribution.dataset import Dataset
+from datalad_next.tests.utils import (
+    assert_status,
+    eq_,
+    with_credential,
+    with_tempfile,
+)
+from datalad.tests.utils_pytest import ok_file_has_content
+
+DUMMY_URL = "https://www.example.com/api/"
+DUMMY_TOKEN = "WTJ3G8XWO9G8V1BB4K8N81KNGRPFJOVL"  # needed to pass length assertion
+XML_CONTENT = """<?xml version="1.0" encoding="UTF-8" ?>"""
+CREDNAME = "redcap"
+
+
+@with_tempfile
+@patch(
+    "datalad_redcap.export_project_xml.ProjectInfo.export_project_xml",
+    return_value=XML_CONTENT,
+)
+@with_credential(CREDNAME, type="token", secret=DUMMY_TOKEN)
+def test_export_xml_saves_content(ds_path=None, mocker=None):
+    ds = Dataset(ds_path).create(result_renderer="disabled")
+    fname = "project.xml"
+
+    res = export_redcap_project_xml(
+        url=DUMMY_URL,
+        outfile=fname,
+        dataset=ds,
+        credential=CREDNAME,
+    )
+
+    assert_status("ok", res)
+    ok_file_has_content(Path(ds_path).joinpath(fname), XML_CONTENT)
+    eq_(ds.status(fname, return_type="item-or-list").get("state"), "clean")
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -29,6 +29,7 @@ High-level API commands
    :toctree: generated
 
    export_redcap_form
+   export_redcap_project_xml
    export_redcap_report
    redcap_query
 
@@ -40,6 +41,7 @@ Command line reference
    :maxdepth: 1
 
    generated/man/datalad-export-redcap-form
+   generated/man/datalad-export-redcap-project-xml
    generated/man/datalad-export-redcap-report
    generated/man/datalad-redcap-query.rst