IQSS · kcondon · Jul 29, 2020 · Jul 14, 2020 · Jul 15, 2020 · Jul 15, 2020
diff --git a/conf/docker-aio/run-test-suite.sh b/conf/docker-aio/run-test-suite.sh
@@ -8,4 +8,4 @@ fi
 
 # Please note the "dataverse.test.baseurl" is set to run for "all-in-one" Docker environment.
 # TODO: Rather than hard-coding the list of "IT" classes here, add a profile to pom.xml.
-mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT -Ddataverse.test.baseurl=$dvurl
+mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT -Ddataverse.test.baseurl=$dvurl
diff --git a/doc/release-notes/4529-download-by-dataset.md b/doc/release-notes/4529-download-by-dataset.md
@@ -0,0 +1,6 @@
+In previous versions of Dataverse, downloading all files from a dataset via API was a two step process:
+
+- Find all the database id of the files.
+- Download all the files, using those ids (comma-separated).
+
+Now you can download all files from a dataset (assuming you have access to them) via API by passing the dataset persistent ID (PID such as DOI or Handle) or the dataset's database id. Versions are also supported like with the "download metadata" API you can pass :draft, :latest, :latest-published, or numbers (1.1, 2.0).
diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst
@@ -7,6 +7,75 @@ More advanced features of the Access API include format-specific transformations
 .. contents:: |toctitle|
    :local:
 
+.. _download-by-dataset-api:
+
+Downloading All Files in a Dataset
+----------------------------------
+
+The "download by dataset" API downloads as many files as possible from a dataset as a zipped bundle.
+
+By default, tabular files are downloaded in their "archival" form (tab-separated values). To download the original files (Stata, for example), add ``format=original`` as a query parameter.
+
+There are a number of reasons why not all of the files can be downloaded:
+
+- Some of the files are restricted and your API token doesn't have access (you will still get the unrestricted files).
+- The Dataverse installation has limited how large the zip bundle can be.
+
+In the curl example below, the flags ``-O`` and ``J`` are used. When there are no errors, this has the effect of saving the file as "dataverse_files.zip" (just like the web interface). The flags force errors to be downloaded as a file.
+
+Please note that in addition to the files from dataset, an additional file call "MANIFEST.TXT" will be included in the zipped bundle. It has additional information about the files.
+
+There are two forms of the "download by dataset" API, a basic form and one that supports dataset versions.
+
+Basic Download By Dataset
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The basic form downloads files from the latest accessible version of the dataset. If you are not using an API token, this means the most recently published version. If you are using an API token with full access to the dataset, this means the draft version or the most recently published version if no draft exists.
+
+A curl example using a DOI (no version):
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ
+
+  curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/dataset/:persistentId/?persistentId=$PERSISTENT_ID
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/dataset/:persistentId/?persistentId=doi:10.70122/FK2/N2XGBJ
+
+Download By Dataset By Version
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The second form of the "download by dataset" API allows you to specify which version you'd like to download files from. As with the ``datasets`` API endpoints described in the :doc:`native-api` section, the following identifiers can be used.
+
+* ``:draft``  the draft version, if any
+* ``:latest`` either a draft (if exists) or the latest published version.
+* ``:latest-published`` the latest published version
+* ``x.y`` a specific version, where ``x`` is the major version number and ``y`` is the minor version number.
+* ``x`` same as ``x.0``
+
+A curl example using a DOI (with version):
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ
+  export VERSION=2.0
+
+  curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/dataset/:persistentId/versions/$VERSION?persistentId=$PERSISTENT_ID
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/dataset/:persistentId/versions/2.0?persistentId=doi:10.70122/FK2/N2XGBJ
+
 Basic File Access
 -----------------
 

diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst
@@ -106,7 +106,9 @@ Downloading Files
 
 The :doc:`dataaccess` section explains how to download files.
 
-In order to download files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`.
+To download all the files in a dataset, see :ref:`download-by-dataset-api`.
+
+In order to download individual files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`.
 
 Downloading Metadata
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -28,6 +28,7 @@
 import edu.harvard.iq.dataverse.UserNotification;
 import edu.harvard.iq.dataverse.UserNotificationServiceBean;
 import static edu.harvard.iq.dataverse.api.AbstractApiBean.error;
+import static edu.harvard.iq.dataverse.api.Datasets.handleVersion;
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
 import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.authorization.RoleAssignee;
@@ -44,10 +45,16 @@
 import edu.harvard.iq.dataverse.dataaccess.StoredOriginalFile;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
+import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RequestAccessCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
@@ -541,12 +548,73 @@ public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") bool
 
         return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
     }
+
+    @Path("dataset/{id}")
+    @GET
+    @Produces({"application/zip"})
+    public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
+        try {
+            DataverseRequest req = createDataverseRequest(findUserOrDie());
+            final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId)));
+            final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved));
+            String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas());
+            return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+    }
+
+    @Path("dataset/{id}/versions/{versionId}")
+    @GET
+    @Produces({"application/zip"})
+    public Response downloadAllFromVersion(@PathParam("id") String datasetIdOrPersistentId, @PathParam("versionId") String versionId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
+        try {
+            DataverseRequest req = createDataverseRequest(findUserOrDie());
+            final Dataset ds = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId)));
+            DatasetVersion dsv = execCommand(handleVersion(versionId, new Datasets.DsVersionHandler<Command<DatasetVersion>>() {
+
+                @Override
+                public Command<DatasetVersion> handleLatest() {
+                    return new GetLatestAccessibleDatasetVersionCommand(req, ds);
+                }
+
+                @Override
+                public Command<DatasetVersion> handleDraft() {
+                    return new GetDraftDatasetVersionCommand(req, ds);
+                }
+
+                @Override
+                public Command<DatasetVersion> handleSpecific(long major, long minor) {
+                    return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor);
+                }
+
+                @Override
+                public Command<DatasetVersion> handleLatestPublished() {
+                    return new GetLatestPublishedDatasetVersionCommand(req, ds);
+                }
+            }));
+            if (dsv == null) {
+                return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found"));
+            }
+            String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas());
+            return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+    }
+
+    private static String getFileIdsAsCommaSeparated(List<FileMetadata> fileMetadatas) {
+        List<String> ids = new ArrayList<>();
+        for (FileMetadata fileMetadata : fileMetadatas) {
+            Long fileId = fileMetadata.getDataFile().getId();
+            ids.add(String.valueOf(fileId));
+        }
+        return String.join(",", ids);
+    }
+
     /*
      * API method for downloading zipped bundles of multiple files:
      */
-
-    // TODO: Rather than only supporting looking up files by their database IDs,
-    // consider supporting persistent identifiers.
     @Path("datafiles/{fileIds}")
     @GET
     @Produces({"application/zip"})

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -219,7 +219,7 @@ public class Datasets extends AbstractApiBean {
      * Used to consolidate the way we parse and handle dataset versions.
      * @param <T> 
      */
-    private interface DsVersionHandler<T> {
+    public interface DsVersionHandler<T> {
         T handleLatest();
         T handleDraft();
         T handleSpecific( long major, long minor );
@@ -1684,7 +1684,7 @@ private void msgt(String m){
     }
 
 
-    private <T> T handleVersion( String versionId, DsVersionHandler<T> hdl )
+    public static <T> T handleVersion( String versionId, DsVersionHandler<T> hdl )
         throws WrappedResponse {
         switch (versionId) {
             case ":latest": return hdl.handleLatest();

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
@@ -2329,6 +2329,7 @@ access.api.requestList.fileNotFound=Could not find datafile with id {0}.
 access.api.requestList.noKey=You must provide a key to get list of access requests for a file. 
 access.api.requestList.noRequestsFound=There are no access requests for this file {0}.
 access.api.exception.metadata.not.available.for.nontabular.file=This type of metadata is only available for tabular files.
+access.api.exception.version.not.found=Could not find requested dataset version.
 
 #permission
 permission.AddDataverse.label=AddDataverse