hubmapconsortium · yuanzhou · Oct 8, 2021 · Sep 29, 2021 · Sep 29, 2021 · Sep 29, 2021
diff --git a/src/app.py b/src/app.py
@@ -134,6 +134,7 @@ def close_neo4j_driver(error):
     schema_manager.initialize(app.config['SCHEMA_YAML_FILE'], 
                               app.config['UUID_API_URL'],
                               app.config['INGEST_API_URL'],
+                              app.config['SEARCH_API_URL'],
                               auth_helper_instance,
                               neo4j_driver_instance)
 
@@ -812,6 +813,14 @@ def create_entity(entity_type):
         # No need to log the validation errors
         bad_request_error(str(e))
 
+    # Execute property level validators defined in schema yaml before entity property creation
+    # Use empty dict {} to indicate there's no existing_data_dict
+    try:
+        schema_manager.execute_property_level_validators('before_property_create_validators', normalized_entity_type, request, {}, json_data_dict)
+    # Currently only ValueError
+    except ValueError as e: 
+        bad_request_error(e)
+
     # Sample and Dataset: additional validation, create entity, after_create_trigger
     # Collection and Donor: create entity
     if normalized_entity_type == 'Sample':
@@ -2049,11 +2058,22 @@ def retract_dataset(id):
 1-5 will be returned in reverse order (newest first). Non-public access is only required to 
 retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary
 contains the dataset revision number and its uuid. Optionally, the full dataset can be included for each.
+
 By default, only the revision number and uuid is included. To include the full dataset, the query 
 parameter "include_dataset" can be given with the value of "true". If this parameter is not included or 
 is set to false, the dataset will not be included. For example, to include the full datasets for each revision,
 use '/datasets/<id>/revisions?include_dataset=true'. To omit the datasets, either set include_dataset=false, or
 simply do not include this parameter. 
+
+Parameters
+----------
+id : str
+    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target dataset 
+
+Returns
+-------
+list
+    The list of revision datasets
 """
 @app.route('/datasets/<id>/revisions', methods=['GET'])
 def get_revisions_list(id):

diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
@@ -386,7 +386,6 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid):
         raise TransactionError(msg)
 
 
-
 """
 Get all ancestors by uuid
 

diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
@@ -1,7 +1,7 @@
 ############################################# Rules #############################################
 # Entity properties:
 # - type: data type of the property, one of the following: string|integer|boolean|list|json_string
-# - generated: whether the property is auto genearated or user provided, default to false
+# - generated: whether the property is auto genearated (either with a  `before_create_trigger` or not) or user provided, default to false
 # - required_on_create: whether the property is required from user reqeust JSON for entity creation via POST
 # - immutable: whether the property can NOT be updated once being created, default to false
 # - transient: whether the property to persist in database or not, default to false
@@ -13,11 +13,11 @@
 # - types: before_entity_create_validator, a single validation method needed for creating or updating the entity
 
 # Property level validators: 
-# - types: before_property_update_validators, a list of validation methods
+# - types: before_property_create_validators|before_property_update_validators, a list of validation methods
 
 # Entity creation via http POST request:
 # - Use `generated: true` to mark a property as to be auto generated by the program instead of from user input JSON
-# - If a property is marked as `generated: true`, either no trigger method needed (E.g. Donor.image_files) or a `before_create_trigger` can be used to generate the value
+# - If a property is marked as `generated: true`, either no trigger method needed (E.g. Donor.image_files, Dataset.title) or a `before_create_trigger` can be used to generate the value
 # - If a property has `before_create_trigger`, it can't be specified in client request JSON
 # - If a property is mraked as `generated: true`, it can't be `required_on_create: true` at the same time
 # - Use `required_on_create: true` to mark a property as required from user input
@@ -232,21 +232,29 @@ ENTITIES:
         required_on_create: true # Only required for create via POST, not update via PUT
         description: "True if the data contains any human genetic sequence information."
       status:
+        type: string
         before_property_update_validators: 
           - validate_application_header_before_property_update
           - validate_dataset_status_value
-        type: string
         generated: true
         description: "One of: New|Processing|QA|Published|Error|Hold|Invalid"
         before_create_trigger: set_dataset_status_new
         after_update_trigger: update_dataset_and_ancestors_data_access_level
       title:
         type: string
+        generated: true # Disallow entry from users via POST
+        immutable: true # Disallow update via PUT
+        description: "The auto generated dataset title."
+        on_read_trigger: get_dataset_title
         description: "The auto generated dataset title."
       lab_dataset_id:
         type: string
         description: "A name or identifier used by the lab who is uploading the data to cross reference the data locally"
       data_types:
+        before_property_create_validators:
+          - validate_no_duplicates_in_list
+        before_property_update_validators:
+          - validate_no_duplicates_in_list
         type: list
         required_on_create: true # Only required for create via POST, not update via PUT
         description: "The data or assay types contained in this dataset as a json array of strings.  Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/master/src/search-schema/data/definitions/enums/assay_types.yaml)."
@@ -268,6 +276,10 @@ ENTITIES:
       direct_ancestor_uuids:
         required_on_create: true # Only required for create via POST, not update via PUT
         type: list
+        before_property_create_validators:
+          - validate_no_duplicates_in_list
+        before_property_update_validators:
+          - validate_no_duplicates_in_list
         transient: true
         exposed: false
         description: "The uuids of source entities from which this new entity is derived.  Used to pass source entity ids in on POST or PUT calls used to create the linkages."
@@ -707,6 +719,8 @@ ENTITIES:
       #datasets will be added only via PUT/update, NEVER on POST/create
       dataset_uuids_to_link:
         type: list 
+        before_property_update_validators:
+          - validate_no_duplicates_in_list
         generated: true # Disallow user input from request json when being created
         transient: true
         exposed: false
@@ -716,6 +730,8 @@ ENTITIES:
       #transient attribute used to pass in a list of dataset ids of datasets to be removed from a DataSubmisison
       dataset_uuids_to_unlink:
         type: list 
+        before_property_update_validators:
+          - validate_no_duplicates_in_list
         generated: true # Disallow user input from request json when being created
         transient: true
         exposed: false

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
@@ -37,6 +37,7 @@
 _schema = None
 _uuid_api_url = None
 _ingest_api_url = None
+_search_api_url = None
 _auth_helper = None
 _neo4j_driver = None
 
@@ -57,19 +58,22 @@
 """
 def initialize(valid_yaml_file, 
                uuid_api_url,
-               ingest_api_url, 
+               ingest_api_url,
+               search_api_url,
                auth_helper_instance,
                neo4j_driver_instance):
     # Specify as module-scope variables
     global _schema
     global _uuid_api_url
     global _ingest_api_url
+    global _search_api_url
     global _auth_helper
     global _neo4j_driver
 
     _schema = load_provenance_schema(valid_yaml_file)
     _uuid_api_url = uuid_api_url
     _ingest_api_url = ingest_api_url
+    _search_api_url = search_api_url
 
     # Get the helper instances
     _auth_helper = auth_helper_instance
@@ -538,23 +542,12 @@ def normalize_entity_result_for_response(entity_dict, properties_to_exclude = []
                 # Skip properties with None value and the ones that are marked as not to be exposed.
                 # By default, all properties are exposed if not marked as `exposed: false`
                 # It's still possible to see `exposed: true` marked explictly
-                if (entity_dict[key] is not None) and ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']):
-                    # Safely evaluate a string containing a Python dict or list literal
-                    # Only convert to Python list/dict when the string literal is not empty
-                    # instead of returning the json-as-string or array-as-string
-                    if isinstance(entity_dict[key], str) and entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']):
-                        # ast uses compile to compile the source string (which must be an expression) into an AST
-                        # If the source string is not a valid expression (like an empty string), a SyntaxError will be raised by compile
-                        # If, on the other hand, the source string would be a valid expression (e.g. a variable name like foo), 
-                        # compile will succeed but then literal_eval() might fail with a ValueError
-                        # Also this fails with a TypeError: literal_eval("{{}: 'value'}")
-                        try:
-                            entity_dict[key] = ast.literal_eval(entity_dict[key])
-                        except (SyntaxError, ValueError, TypeError) as e:
-                            logger.debug(f"Invalid expression (string value) of key: {key} for ast.literal_eval()")
-                            logger.debug(entity_dict[key])
-                            msg = "Failed to convert the source string with ast.literal_eval()"
-                            logger.exception(msg)
+                if (entity_dict[key] is not None) and ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']): 
+                    if entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']):
+                        # Safely evaluate a string containing a Python dict or list literal
+                        # Only convert to Python list/dict when the string literal is not empty
+                        # instead of returning the json-as-string or array-as-string
+                        entity_dict[key] = convert_str_to_data(entity_dict[key])
 
                     # Add the target key with correct value of data type to the normalized_entity dict
                     normalized_entity[key] = entity_dict[key]
@@ -740,13 +733,13 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque
 Parameters
 ----------
 validator_type : str
-    For now only: before_property_update_validators (support multiple validators)
+    before_property_create_validators|before_property_update_validators (support multiple validators)
 normalized_entity_type : str
     One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload
 request: Flask request object
     The instance of Flask request passed in from application request
 existing_data_dict : dict
-    A dictionary that contains all existing entity properties
+    A dictionary that contains all existing entity properties, {} for before_property_create_validators
 new_data_dict : dict
     The json data in request body, already after the regular validations
 """
@@ -890,10 +883,10 @@ def validate_entity_level_validator_type(validator_type):
 Parameters
 ----------
 validator_type : str
-    One of the validator types: before_property_update_validators
+    One of the validator types: before_property_create_validators|before_property_update_validators
 """
 def validate_property_level_validator_type(validator_type):
-    accepted_validator_types = ['before_property_update_validators']
+    accepted_validator_types = ['before_property_create_validators', 'before_property_update_validators']
     separator = ', '
 
     if validator_type.lower() not in accepted_validator_types:
@@ -1439,6 +1432,20 @@ def get_ingest_api_url():
     return _ingest_api_url
 
 
+"""
+Get the search-api URL to be used by trigger methods
+
+Returns
+-------
+str
+    The search-api URL
+"""
+def get_search_api_url():
+    global _search_api_url
+
+    return _search_api_url
+
+
 """
 Get the AUthHelper instance to be used by trigger methods
 
@@ -1466,6 +1473,40 @@ def get_neo4j_driver_instance():
 
     return _neo4j_driver
 
+"""
+Convert a string representation of the Python list/dict (either nested or not) to a Python list/dict
+
+Parameters
+----------
+data_str: str
+    The string representation of the Python list/dict stored in Neo4j.
+    It's not stored in Neo4j as a json string! And we can't store it as a json string 
+    due to the way that Cypher handles single/double quotes.
+
+Returns
+-------
+list or dict
+    The real Python list or dict after evaluation
+"""
+def convert_str_to_data(data_str):
+    if isinstance(data_str, str):
+        # ast uses compile to compile the source string (which must be an expression) into an AST
+        # If the source string is not a valid expression (like an empty string), a SyntaxError will be raised by compile
+        # If, on the other hand, the source string would be a valid expression (e.g. a variable name like foo), 
+        # compile will succeed but then literal_eval() might fail with a ValueError
+        # Also this fails with a TypeError: literal_eval("{{}: 'value'}")
+        try:
+            data = ast.literal_eval(data_str)
+        except (SyntaxError, ValueError, TypeError) as e:
+            logger.debug(f"Invalid expression (string value): {data_str} to be evaluated by ast.literal_eval()")
+            logger.debug(entity_dict[key])
+            msg = "Failed to convert the source string with ast.literal_eval()"
+            logger.exception(msg)
+    else:
+        data = data_str
+
+    return data
+
 
 ####################################################################################################
 ## Internal functions
@@ -1495,3 +1536,4 @@ def _create_request_headers(user_token):
 
     return headers_dict
 
+
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
@@ -56,6 +56,45 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None):
 
     return results
 
+
+"""
+Get the sample organ name and donor metadata information of the given dataset uuid
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid : str
+    The uuid of target entity 
+
+Returns
+-------
+str: The sample organ name
+str: The donor metadata (string representation of a Python dict)
+"""
+def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
+    organ_name = None
+    donor_metadata = None
+
+    query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) "
+             # Filter out the Lab entities
+             f"WHERE e.uuid='{uuid}' AND s.specimen_type='organ' AND EXISTS(s.organ) "
+             # COLLECT() returns a list
+             # apoc.coll.toSet() reruns a set containing unique nodes
+             f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata")
+
+    logger.debug("======get_dataset_organ_and_donor_info() query======")
+    logger.debug(query)
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(_execute_readonly_tx, query)
+
+        if record:
+            organ_name = record['organ_name']
+            donor_metadata = record['donor_metadata']
+
+    return organ_name, donor_metadata
+
 """
 Create or recreate one or more linkages (via Activity nodes) 
 between the target entity node and the direct ancestor nodes in neo4j
Original file line number	Diff line number	Diff line change
Expand Up		@@ -386,7 +386,6 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid):
		raise TransactionError(msg)



		"""
		Get all ancestors by uuid

Expand Down