Skip to content
20 changes: 20 additions & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def close_neo4j_driver(error):
schema_manager.initialize(app.config['SCHEMA_YAML_FILE'],
app.config['UUID_API_URL'],
app.config['INGEST_API_URL'],
app.config['SEARCH_API_URL'],
auth_helper_instance,
neo4j_driver_instance)

Expand Down Expand Up @@ -812,6 +813,14 @@ def create_entity(entity_type):
# No need to log the validation errors
bad_request_error(str(e))

# Execute property level validators defined in schema yaml before entity property creation
# Use empty dict {} to indicate there's no existing_data_dict
try:
schema_manager.execute_property_level_validators('before_property_create_validators', normalized_entity_type, request, {}, json_data_dict)
# Currently only ValueError
except ValueError as e:
bad_request_error(e)

# Sample and Dataset: additional validation, create entity, after_create_trigger
# Collection and Donor: create entity
if normalized_entity_type == 'Sample':
Expand Down Expand Up @@ -2049,11 +2058,22 @@ def retract_dataset(id):
1-5 will be returned in reverse order (newest first). Non-public access is only required to
retrieve information on non-published datasets. Output will be a list of dictionaries. Each dictionary
contains the dataset revision number and its uuid. Optionally, the full dataset can be included for each.

By default, only the revision number and uuid is included. To include the full dataset, the query
parameter "include_dataset" can be given with the value of "true". If this parameter is not included or
is set to false, the dataset will not be included. For example, to include the full datasets for each revision,
use '/datasets/<id>/revisions?include_dataset=true'. To omit the datasets, either set include_dataset=false, or
simply do not include this parameter.

Parameters
----------
id : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target dataset

Returns
-------
list
The list of revision datasets
"""
@app.route('/datasets/<id>/revisions', methods=['GET'])
def get_revisions_list(id):
Expand Down
1 change: 0 additions & 1 deletion src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,6 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid):
raise TransactionError(msg)



"""
Get all ancestors by uuid

Expand Down
24 changes: 20 additions & 4 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
############################################# Rules #############################################
# Entity properties:
# - type: data type of the property, one of the following: string|integer|boolean|list|json_string
# - generated: whether the property is auto genearated or user provided, default to false
# - generated: whether the property is auto genearated (either with a `before_create_trigger` or not) or user provided, default to false
# - required_on_create: whether the property is required from user reqeust JSON for entity creation via POST
# - immutable: whether the property can NOT be updated once being created, default to false
# - transient: whether the property to persist in database or not, default to false
Expand All @@ -13,11 +13,11 @@
# - types: before_entity_create_validator, a single validation method needed for creating or updating the entity

# Property level validators:
# - types: before_property_update_validators, a list of validation methods
# - types: before_property_create_validators|before_property_update_validators, a list of validation methods

# Entity creation via http POST request:
# - Use `generated: true` to mark a property as to be auto generated by the program instead of from user input JSON
# - If a property is marked as `generated: true`, either no trigger method needed (E.g. Donor.image_files) or a `before_create_trigger` can be used to generate the value
# - If a property is marked as `generated: true`, either no trigger method needed (E.g. Donor.image_files, Dataset.title) or a `before_create_trigger` can be used to generate the value
# - If a property has `before_create_trigger`, it can't be specified in client request JSON
# - If a property is mraked as `generated: true`, it can't be `required_on_create: true` at the same time
# - Use `required_on_create: true` to mark a property as required from user input
Expand Down Expand Up @@ -232,21 +232,29 @@ ENTITIES:
required_on_create: true # Only required for create via POST, not update via PUT
description: "True if the data contains any human genetic sequence information."
status:
type: string
before_property_update_validators:
- validate_application_header_before_property_update
- validate_dataset_status_value
type: string
generated: true
description: "One of: New|Processing|QA|Published|Error|Hold|Invalid"
before_create_trigger: set_dataset_status_new
after_update_trigger: update_dataset_and_ancestors_data_access_level
title:
type: string
generated: true # Disallow entry from users via POST
immutable: true # Disallow update via PUT
description: "The auto generated dataset title."
on_read_trigger: get_dataset_title
description: "The auto generated dataset title."
lab_dataset_id:
type: string
description: "A name or identifier used by the lab who is uploading the data to cross reference the data locally"
data_types:
before_property_create_validators:
- validate_no_duplicates_in_list
before_property_update_validators:
- validate_no_duplicates_in_list
type: list
required_on_create: true # Only required for create via POST, not update via PUT
description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/master/src/search-schema/data/definitions/enums/assay_types.yaml)."
Expand All @@ -268,6 +276,10 @@ ENTITIES:
direct_ancestor_uuids:
required_on_create: true # Only required for create via POST, not update via PUT
type: list
before_property_create_validators:
- validate_no_duplicates_in_list
before_property_update_validators:
- validate_no_duplicates_in_list
transient: true
exposed: false
description: "The uuids of source entities from which this new entity is derived. Used to pass source entity ids in on POST or PUT calls used to create the linkages."
Expand Down Expand Up @@ -707,6 +719,8 @@ ENTITIES:
#datasets will be added only via PUT/update, NEVER on POST/create
dataset_uuids_to_link:
type: list
before_property_update_validators:
- validate_no_duplicates_in_list
generated: true # Disallow user input from request json when being created
transient: true
exposed: false
Expand All @@ -716,6 +730,8 @@ ENTITIES:
#transient attribute used to pass in a list of dataset ids of datasets to be removed from a DataSubmisison
dataset_uuids_to_unlink:
type: list
before_property_update_validators:
- validate_no_duplicates_in_list
generated: true # Disallow user input from request json when being created
transient: true
exposed: false
Expand Down
86 changes: 64 additions & 22 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
_schema = None
_uuid_api_url = None
_ingest_api_url = None
_search_api_url = None
_auth_helper = None
_neo4j_driver = None

Expand All @@ -57,19 +58,22 @@
"""
def initialize(valid_yaml_file,
uuid_api_url,
ingest_api_url,
ingest_api_url,
search_api_url,
auth_helper_instance,
neo4j_driver_instance):
# Specify as module-scope variables
global _schema
global _uuid_api_url
global _ingest_api_url
global _search_api_url
global _auth_helper
global _neo4j_driver

_schema = load_provenance_schema(valid_yaml_file)
_uuid_api_url = uuid_api_url
_ingest_api_url = ingest_api_url
_search_api_url = search_api_url

# Get the helper instances
_auth_helper = auth_helper_instance
Expand Down Expand Up @@ -538,23 +542,12 @@ def normalize_entity_result_for_response(entity_dict, properties_to_exclude = []
# Skip properties with None value and the ones that are marked as not to be exposed.
# By default, all properties are exposed if not marked as `exposed: false`
# It's still possible to see `exposed: true` marked explictly
if (entity_dict[key] is not None) and ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']):
# Safely evaluate a string containing a Python dict or list literal
# Only convert to Python list/dict when the string literal is not empty
# instead of returning the json-as-string or array-as-string
if isinstance(entity_dict[key], str) and entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']):
# ast uses compile to compile the source string (which must be an expression) into an AST
# If the source string is not a valid expression (like an empty string), a SyntaxError will be raised by compile
# If, on the other hand, the source string would be a valid expression (e.g. a variable name like foo),
# compile will succeed but then literal_eval() might fail with a ValueError
# Also this fails with a TypeError: literal_eval("{{}: 'value'}")
try:
entity_dict[key] = ast.literal_eval(entity_dict[key])
except (SyntaxError, ValueError, TypeError) as e:
logger.debug(f"Invalid expression (string value) of key: {key} for ast.literal_eval()")
logger.debug(entity_dict[key])
msg = "Failed to convert the source string with ast.literal_eval()"
logger.exception(msg)
if (entity_dict[key] is not None) and ('exposed' not in properties[key]) or (('exposed' in properties[key]) and properties[key]['exposed']):
if entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']):
# Safely evaluate a string containing a Python dict or list literal
# Only convert to Python list/dict when the string literal is not empty
# instead of returning the json-as-string or array-as-string
entity_dict[key] = convert_str_to_data(entity_dict[key])

# Add the target key with correct value of data type to the normalized_entity dict
normalized_entity[key] = entity_dict[key]
Expand Down Expand Up @@ -740,13 +733,13 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque
Parameters
----------
validator_type : str
For now only: before_property_update_validators (support multiple validators)
before_property_create_validators|before_property_update_validators (support multiple validators)
normalized_entity_type : str
One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
A dictionary that contains all existing entity properties, {} for before_property_create_validators
new_data_dict : dict
The json data in request body, already after the regular validations
"""
Expand Down Expand Up @@ -890,10 +883,10 @@ def validate_entity_level_validator_type(validator_type):
Parameters
----------
validator_type : str
One of the validator types: before_property_update_validators
One of the validator types: before_property_create_validators|before_property_update_validators
"""
def validate_property_level_validator_type(validator_type):
accepted_validator_types = ['before_property_update_validators']
accepted_validator_types = ['before_property_create_validators', 'before_property_update_validators']
separator = ', '

if validator_type.lower() not in accepted_validator_types:
Expand Down Expand Up @@ -1439,6 +1432,20 @@ def get_ingest_api_url():
return _ingest_api_url


"""
Get the search-api URL to be used by trigger methods

Returns
-------
str
The search-api URL
"""
def get_search_api_url():
global _search_api_url

return _search_api_url


"""
Get the AUthHelper instance to be used by trigger methods

Expand Down Expand Up @@ -1466,6 +1473,40 @@ def get_neo4j_driver_instance():

return _neo4j_driver

"""
Convert a string representation of the Python list/dict (either nested or not) to a Python list/dict

Parameters
----------
data_str: str
The string representation of the Python list/dict stored in Neo4j.
It's not stored in Neo4j as a json string! And we can't store it as a json string
due to the way that Cypher handles single/double quotes.

Returns
-------
list or dict
The real Python list or dict after evaluation
"""
def convert_str_to_data(data_str):
if isinstance(data_str, str):
# ast uses compile to compile the source string (which must be an expression) into an AST
# If the source string is not a valid expression (like an empty string), a SyntaxError will be raised by compile
# If, on the other hand, the source string would be a valid expression (e.g. a variable name like foo),
# compile will succeed but then literal_eval() might fail with a ValueError
# Also this fails with a TypeError: literal_eval("{{}: 'value'}")
try:
data = ast.literal_eval(data_str)
except (SyntaxError, ValueError, TypeError) as e:
logger.debug(f"Invalid expression (string value): {data_str} to be evaluated by ast.literal_eval()")
logger.debug(entity_dict[key])
msg = "Failed to convert the source string with ast.literal_eval()"
logger.exception(msg)
else:
data = data_str

return data


####################################################################################################
## Internal functions
Expand Down Expand Up @@ -1495,3 +1536,4 @@ def _create_request_headers(user_token):

return headers_dict


39 changes: 39 additions & 0 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,45 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None):

return results


"""
Get the sample organ name and donor metadata information of the given dataset uuid

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of target entity

Returns
-------
str: The sample organ name
str: The donor metadata (string representation of a Python dict)
"""
def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
organ_name = None
donor_metadata = None

query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) "
# Filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND s.specimen_type='organ' AND EXISTS(s.organ) "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata")

logger.debug("======get_dataset_organ_and_donor_info() query======")
logger.debug(query)

with neo4j_driver.session() as session:
record = session.read_transaction(_execute_readonly_tx, query)

if record:
organ_name = record['organ_name']
donor_metadata = record['donor_metadata']

return organ_name, donor_metadata

"""
Create or recreate one or more linkages (via Activity nodes)
between the target entity node and the direct ancestor nodes in neo4j
Expand Down
Loading