Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,80 @@ def get_dataset_revision_number(id):
return jsonify(revision_number)


"""
Retrieve a list of all revisions of a dataset from the id of any dataset in the chain.
E.g: If there are 5 revisions, and the id for revision 4 is given, a list of revisions
1-5 will be returned in reverse order (newest first). Non-public access is only required to
retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary
contains the dataset revision number, its uuid, and then the complete dataset (optional).
"""

@app.route('/datasets/<id>/revisions', methods=['GET'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be consistent, please add a comment block that describes this call, the parameters, and return type above this line. Follow examples from other endpoints in this file.

def get_revisions_list(id):
# Token is not required, but if an invalid token provided,
# we need to tell the client with a 401 error
validate_token_if_auth_header_exists(request)

# Use the internal token to query the target entity
# since public entities don't require user token
token = get_internal_token()

# Query target entity against uuid-api and neo4j and return as a dict if exists
entity_dict = query_target_entity(id, token)
normalized_entity_type = entity_dict['entity_type']

# Only for Dataset
if normalized_entity_type != 'Dataset':
bad_request_error("The entity of given id is not a Dataset")

# Only published/public datasets don't require token
if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
# Token is required and the user must belong to HuBMAP-READ group
token = get_user_token(request, non_public_access_required=True)

# By now, either the entity is public accessible or
# the user token has the correct access level
# Get the all the sorted (DESC based on creation timestamp) revisions
sorted_revisions_list = app_neo4j_queries.get_sorted_revisions(neo4j_driver_instance, entity_dict['uuid'])

# Skip some of the properties that are time-consuming to generate via triggers
# direct_ancestors, collections, and upload for Dataset
properties_to_skip = [
'direct_ancestors',
'collections',
'upload'
]
complete_revisions_list = schema_manager.get_complete_entities_list(token, sorted_revisions_list, properties_to_skip)
normalized_revisions_list = schema_manager.normalize_entities_list_for_response(complete_revisions_list)

# Only check the very last revision (the first revision dict since normalized_revisions_list is already sorted DESC)
# to determine if send it back or not
if not user_in_hubmap_read_group(request):
latest_revision = normalized_revisions_list[0]

if latest_revision['status'].lower() != DATASET_STATUS_PUBLISHED:
normalized_revisions_list.pop(0)

# Also hide the 'next_revision_uuid' of the second last revision from response
if 'next_revision_uuid' in normalized_revisions_list[0]:
normalized_revisions_list[0].pop('next_revision_uuid')

# Now all we need to do is to compose the result list
results = []
revision_number = len(normalized_revisions_list)
for revision in normalized_revisions_list:
result = {
'revision_number': revision_number,
'dataset_uuid': revision['uuid'],
'dataset': revision
}

results.append(result)
revision_number -= 1

return jsonify(results)


####################################################################################################
## Internal Functions
####################################################################################################
Expand Down
41 changes: 41 additions & 0 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,47 @@ def get_children(neo4j_driver, uuid, property_key = None):
return results



"""
Get all revisions for a given dataset uuid and sort them in descending order based on their creation time

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of target entity

Returns
-------
dict
A list of all the unique revision datasets in DESC order
"""
def get_sorted_revisions(neo4j_driver, uuid):
results = []

query = (f"MATCH (prev:Dataset)<-[:REVISION_OF *0..]-(e:Dataset)<-[:REVISION_OF *0..]-(next:Dataset) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"WITH apoc.coll.toSet(COLLECT(next) + COLLECT(e) + COLLECT(prev)) AS collection "
f"UNWIND collection as node "
f"WITH node ORDER BY node.created_timestamp DESC "
f"RETURN COLLECT(node) AS {record_field_name}")

logger.debug("======get_sorted_revisions() query======")
logger.debug(query)

with neo4j_driver.session() as session:
record = session.read_transaction(_execute_readonly_tx, query)

if record and record[record_field_name]:
# Convert the list of nodes to a list of dicts
results = _nodes_to_dicts(record[record_field_name])

return results


"""
Get all previous revisions of the target entity by uuid

Expand Down