# Example of validating the registry entry using schema

In [1]:
## CX: allows multiple lines of code to print from one code block
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import json
import jsonschema
import pathlib
import jsonref
import yaml

In [2]:
pathlib.Path.cwd()

PosixPath('/Users/jay/Desktop/biothings_explorer/jupyter notebooks/CX_WIPs/Draft_Metadata')

## loading schema

In [3]:
yaml_schema = pathlib.Path.cwd().joinpath("draft6_schema_registry.yaml")
with open(yaml_schema) as file:
    schema_from_yaml = yaml.load(file, Loader=yaml.SafeLoader)
    schema_from_yaml = json.dumps(schema_from_yaml, indent=2)
    schema_from_yaml = jsonref.loads(schema_from_yaml)

In [4]:
schema_from_yaml.keys()
schema_from_yaml['description']

dict_keys(['title', '$schema', 'type', 'description', 'required', 'properties', 'definitions'])

'Describes the x-bte-association-retrieval metadata extension within SmartAPI registry files. Knowledge-provider (KP) APIs in the Translator ecosystem provide associations/edges between biomedical entities. This extension describes how to query these APIs and how to post-process their responses and associated metadata into a standard format. There may be multiple objects within the extension for different queries (with different input types, predicates, sources, or output types involved).'

In [5]:
schema_from_yaml['properties']['components']['properties']\
['x-bte-association-retrieval']['patternProperties']['.']['properties'].keys()

dict_keys(['queryInfo', 'inputs', 'outputs', 'predicateInfo', 'references', 'provenance', 'numericMeasures', 'categoryMeasures', 'contextRelevance'])

In [6]:
schema_from_yaml['properties']['components']['properties']\
['x-bte-association-retrieval']['patternProperties']['.']['properties']['references']

{'description': 'Used to provide publications and website URLs for users to learn more about an association, knowledge-source, measure, context/relevance, etc',
 'type': 'object',
 'additionalProperties': False,
 'minProperties': 1,
 'properties': {'publications': {'type': 'object',
   'minProperties': 1,
   'patternProperties': {'.': {'oneOf': [{'type': 'object',
       'required': ['value'],
       'additionalProperties': False,
       'properties': {'value': {'oneOf': [{'type': 'array',
           'minItems': 1,
           'items': {'type': ['string', 'number']}},
          {'type': ['string', 'number']}]}}},
      {'description': 'Info is dynamic and can be taken directly from the value in a specific field of the API/JSON response. In this situation, the registry points to the response field using dot-notation',
       'type': 'object',
       'required': ['responseField'],
       'additionalProperties': False,
       'properties': {'responseField': {'type': 'string'}}},
      {'de

## checking an example

In [8]:
yaml_example = pathlib.Path.cwd().joinpath("draft6_example_registry_automatHMDB.yaml")
with open(yaml_example) as file:
    example_from_yaml = yaml.load(file, Loader=yaml.SafeLoader)
    example_from_yaml = json.dumps(example_from_yaml, indent=2)
    example_from_yaml = jsonref.loads(example_from_yaml)

In [9]:
example_from_yaml['components']['x-bte-association-retrieval'].keys()

dict_keys(['gene2chemical', 'chemical2gene', 'chemical2disease', 'disease2chemical'])

## validate example against schema

In [10]:
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## YAYAYAYAYAY this means it validated!!!!!!!

### error: if a required field is missing

In [10]:
## but what if it's a fluke?? 
## remove a required key from one association 
removed1 = example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['predicateInfo'].pop('biolink')

## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## so it works in catching the error yayyyyyyyy

KeyError: 'disease-gene1'

In [12]:
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['predicateInfo']['biolink'] = removed1

In [13]:
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## and it's back and accepted again 

### error: if there are two mins in range

note: this test requires at least one numeric measure in the registry entry

In [14]:
## another check: look at the range for the first numericMeasure
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['numericMeasures'][0]['range']

{'minExclusive': 0, 'maxInclusive': 1}

In [15]:
## add an error: minInclusive
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['numericMeasures'][0]['range']['minInclusive'] = -1

## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)

ValidationError: {'anyOf': [{'type': 'object', 'required': ['minExclusive', 'minInclusive']}, {'type': 'object', 'required': ['maxExclusive', 'maxInclusive']}]} is not allowed for {'minExclusive': 0, 'maxInclusive': 1, 'minInclusive': -1}

Failed validating 'not' in schema['properties']['components']['properties']['x-bte-association-retrieval']['patternProperties']['.']['properties']['numericMeasures']['items']['allOf'][1]['properties']['range']:
    {'additionalProperties': False,
     'description': 'Object, Python dict-like. Defines an expected lower '
                    'and/or upper bound for values (minimum and maximum). '
                    'Inclusive means the range includes the boundary '
                    'number; exclusive means the range does not. If the '
                    'actual lower-bound is negative-infinity and/or the '
                    'actual upper bound is positive-infinity, do not set a '
                    'bound',
     'minProperties': 1,
     'not': {'anyOf': [{'required': ['minExclusive', 'minInclusive'],
                        'type': 'object'},
                       {'required': ['maxExclusive', 'maxInclusive'],
                        'type': 'object'}]},
     'properties': {'maxExclusive': {'type': 'number'},
                    'maxInclusive': {'type': 'number'},
                    'minExclusive': {'type': 'number'},
                    'minInclusive': {'type': 'number'}},
     'type': 'object'}

On instance['components']['x-bte-association-retrieval']['disease-gene1']['numericMeasures'][0]['range']:
    {'maxInclusive': 1, 'minExclusive': 0, 'minInclusive': -1}

In [16]:
## fix the error and re-validate
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['numericMeasures'][0]['range'].pop('minInclusive')

-1

In [17]:
## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)

### error: static publications typing

note: 
this test currently requires a numeric measure. 
be careful that you don't overwrite an existing publications key when doing this test

In [18]:
## make a publications/pmid key within measureReferences
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['numericMeasures'][0]['measureReferences']\
['publications'] = {"pmid": {}}  ## make it first 

example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['numericMeasures'][0]

{'name': 'DisGeNET gene-disease association score',
 'responseField': 'disgenet.genes_related_to_disease.score',
 'measureReferences': {'websites': {'value': 'https://www.disgenet.org/dbinfo#section31'},
  'publications': {'pmid': {}}},
 'range': {'minExclusive': 0, 'maxInclusive': 1},
 'directionMeaning': {'larger': 'more_evidence'}}

In [19]:
## then try to validate 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## catches that there should be a key called value

ValidationError: 'value' is a required property

Failed validating 'required' in schema[0]:
    {'additionalProperties': False,
     'properties': {'value': {'oneOf': [{'items': {'type': ['string',
                                                            'number']},
                                         'minItems': 1,
                                         'type': 'array'},
                                        {'type': ['string', 'number']}]}},
     'required': ['value'],
     'type': 'object'}

On instance:
    {}

In [20]:
## remove the publications key
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['numericMeasures'][0]['measureReferences'].pop('publications')

{'pmid': {}}

In [21]:
## then try to validate 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## catches that there should be a key called value

## Export JSON files for the yamls

In [22]:
json_schema_path = pathlib.Path.cwd().joinpath("draft6_schema_registry.json")
with open(json_schema_path, "w") as file:
    json.dump(schema_from_yaml, file, indent=2)

In [23]:
json_example_path = pathlib.Path.cwd().joinpath("draft6_example_registry_disgenetDG.json")
with open(json_example_path, "w") as file:
    json.dump(example_from_yaml, file, indent=2)