diff --git a/src/app.py b/src/app.py index 6aa950aa..82f3c924 100644 --- a/src/app.py +++ b/src/app.py @@ -1954,6 +1954,80 @@ def get_dataset_revision_number(id): return jsonify(revision_number) +""" +Retrieve a list of all revisions of a dataset from the id of any dataset in the chain. +E.g: If there are 5 revisions, and the id for revision 4 is given, a list of revisions +1-5 will be returned in reverse order (newest first). Non-public access is only required to +retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary +contains the dataset revision number, its uuid, and then the complete dataset (optional). +""" + +@app.route('/datasets//revisions', methods=['GET']) +def get_revisions_list(id): + # Token is not required, but if an invalid token provided, + # we need to tell the client with a 401 error + validate_token_if_auth_header_exists(request) + + # Use the internal token to query the target entity + # since public entities don't require user token + token = get_internal_token() + + # Query target entity against uuid-api and neo4j and return as a dict if exists + entity_dict = query_target_entity(id, token) + normalized_entity_type = entity_dict['entity_type'] + + # Only for Dataset + if normalized_entity_type != 'Dataset': + bad_request_error("The entity of given id is not a Dataset") + + # Only published/public datasets don't require token + if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + # Token is required and the user must belong to HuBMAP-READ group + token = get_user_token(request, non_public_access_required=True) + + # By now, either the entity is public accessible or + # the user token has the correct access level + # Get the all the sorted (DESC based on creation timestamp) revisions + sorted_revisions_list = app_neo4j_queries.get_sorted_revisions(neo4j_driver_instance, entity_dict['uuid']) + + # Skip some of the properties that are time-consuming to generate via triggers + # direct_ancestors, collections, and upload for Dataset + properties_to_skip = [ + 'direct_ancestors', + 'collections', + 'upload' + ] + complete_revisions_list = schema_manager.get_complete_entities_list(token, sorted_revisions_list, properties_to_skip) + normalized_revisions_list = schema_manager.normalize_entities_list_for_response(complete_revisions_list) + + # Only check the very last revision (the first revision dict since normalized_revisions_list is already sorted DESC) + # to determine if send it back or not + if not user_in_hubmap_read_group(request): + latest_revision = normalized_revisions_list[0] + + if latest_revision['status'].lower() != DATASET_STATUS_PUBLISHED: + normalized_revisions_list.pop(0) + + # Also hide the 'next_revision_uuid' of the second last revision from response + if 'next_revision_uuid' in normalized_revisions_list[0]: + normalized_revisions_list[0].pop('next_revision_uuid') + + # Now all we need to do is to compose the result list + results = [] + revision_number = len(normalized_revisions_list) + for revision in normalized_revisions_list: + result = { + 'revision_number': revision_number, + 'dataset_uuid': revision['uuid'], + 'dataset': revision + } + + results.append(result) + revision_number -= 1 + + return jsonify(results) + + #################################################################################################### ## Internal Functions #################################################################################################### diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 28793977..e02c3e03 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -592,6 +592,47 @@ def get_children(neo4j_driver, uuid, property_key = None): return results + +""" +Get all revisions for a given dataset uuid and sort them in descending order based on their creation time + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of target entity + +Returns +------- +dict + A list of all the unique revision datasets in DESC order +""" +def get_sorted_revisions(neo4j_driver, uuid): + results = [] + + query = (f"MATCH (prev:Dataset)<-[:REVISION_OF *0..]-(e:Dataset)<-[:REVISION_OF *0..]-(next:Dataset) " + f"WHERE e.uuid='{uuid}' " + # COLLECT() returns a list + # apoc.coll.toSet() reruns a set containing unique nodes + f"WITH apoc.coll.toSet(COLLECT(next) + COLLECT(e) + COLLECT(prev)) AS collection " + f"UNWIND collection as node " + f"WITH node ORDER BY node.created_timestamp DESC " + f"RETURN COLLECT(node) AS {record_field_name}") + + logger.debug("======get_sorted_revisions() query======") + logger.debug(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(_execute_readonly_tx, query) + + if record and record[record_field_name]: + # Convert the list of nodes to a list of dicts + results = _nodes_to_dicts(record[record_field_name]) + + return results + + """ Get all previous revisions of the target entity by uuid