hubmapconsortium · yuanzhou · Sep 24, 2021 · Sep 23, 2021 · Sep 24, 2021 · Sep 24, 2021
diff --git a/src/app.py b/src/app.py
@@ -1954,6 +1954,80 @@ def get_dataset_revision_number(id):
     return jsonify(revision_number)
 
 
+"""
+Retrieve a list of all revisions of a dataset from the id of any dataset in the chain. 
+E.g: If there are 5 revisions, and the id for revision 4 is given, a list of revisions
+1-5 will be returned in reverse order (newest first). Non-public access is only required to 
+retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary
+contains the dataset revision number, its uuid, and then the complete dataset (optional).   
+"""
+
+@app.route('/datasets/<id>/revisions', methods=['GET'])
+def get_revisions_list(id):
+    # Token is not required, but if an invalid token provided,
+    # we need to tell the client with a 401 error
+    validate_token_if_auth_header_exists(request)
+
+    # Use the internal token to query the target entity
+    # since public entities don't require user token
+    token = get_internal_token()
+
+    # Query target entity against uuid-api and neo4j and return as a dict if exists
+    entity_dict = query_target_entity(id, token)
+    normalized_entity_type = entity_dict['entity_type']
+
+    # Only for Dataset
+    if normalized_entity_type != 'Dataset':
+        bad_request_error("The entity of given id is not a Dataset")
+
+    # Only published/public datasets don't require token
+    if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
+        # Token is required and the user must belong to HuBMAP-READ group
+        token = get_user_token(request, non_public_access_required=True)
+
+    # By now, either the entity is public accessible or
+    # the user token has the correct access level
+    # Get the all the sorted (DESC based on creation timestamp) revisions
+    sorted_revisions_list = app_neo4j_queries.get_sorted_revisions(neo4j_driver_instance, entity_dict['uuid'])
+
+    # Skip some of the properties that are time-consuming to generate via triggers
+    # direct_ancestors, collections, and upload for Dataset
+    properties_to_skip = [
+        'direct_ancestors', 
+        'collections', 
+        'upload'
+    ]
+    complete_revisions_list = schema_manager.get_complete_entities_list(token, sorted_revisions_list, properties_to_skip)
+    normalized_revisions_list = schema_manager.normalize_entities_list_for_response(complete_revisions_list)
+
+    # Only check the very last revision (the first revision dict since normalized_revisions_list is already sorted DESC)
+    # to determine if send it back or not
+    if not user_in_hubmap_read_group(request):
+        latest_revision = normalized_revisions_list[0]
+
+        if latest_revision['status'].lower() != DATASET_STATUS_PUBLISHED:
+            normalized_revisions_list.pop(0)
+
+            # Also hide the 'next_revision_uuid' of the second last revision from response
+            if 'next_revision_uuid' in normalized_revisions_list[0]:
+                normalized_revisions_list[0].pop('next_revision_uuid')
+
+    # Now all we need to do is to compose the result list
+    results = []
+    revision_number = len(normalized_revisions_list)
+    for revision in normalized_revisions_list:
+        result = {
+            'revision_number': revision_number,
+            'dataset_uuid': revision['uuid'],
+            'dataset': revision
+        }
+
+        results.append(result)
+        revision_number -= 1
+
+    return jsonify(results)
+
+
 ####################################################################################################
 ## Internal Functions
 ####################################################################################################

diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
@@ -592,6 +592,47 @@ def get_children(neo4j_driver, uuid, property_key = None):
     return results
 
 
+
+"""
+Get all revisions for a given dataset uuid and sort them in descending order based on their creation time
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid : str
+    The uuid of target entity 
+
+Returns
+-------
+dict
+    A list of all the unique revision datasets in DESC order
+"""
+def get_sorted_revisions(neo4j_driver, uuid):
+    results = []
+
+    query = (f"MATCH (prev:Dataset)<-[:REVISION_OF *0..]-(e:Dataset)<-[:REVISION_OF *0..]-(next:Dataset) "
+             f"WHERE e.uuid='{uuid}' "
+             # COLLECT() returns a list
+             # apoc.coll.toSet() reruns a set containing unique nodes
+             f"WITH apoc.coll.toSet(COLLECT(next) + COLLECT(e) + COLLECT(prev)) AS collection "
+             f"UNWIND collection as node "
+             f"WITH node ORDER BY node.created_timestamp DESC "
+             f"RETURN COLLECT(node) AS {record_field_name}")
+
+    logger.debug("======get_sorted_revisions() query======")
+    logger.debug(query)
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(_execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            # Convert the list of nodes to a list of dicts
+            results = _nodes_to_dicts(record[record_field_name])
+
+    return results
+
+
 """
 Get all previous revisions of the target entity by uuid