# Validating example "edge" against schema

In [1]:
## CX: allows multiple lines of code to print from one code block
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import json
import jsonschema
import pathlib
import jsonref
import yaml

In [2]:
pathlib.Path.cwd()

PosixPath('/Users/jay/Desktop/biothings_explorer/jupyter notebooks/CX_WIPs/Draft_Metadata')

## loading schema

In [3]:
yaml_schema = pathlib.Path.cwd().joinpath("draft6_schema_processedAssociation.yaml")
with open(yaml_schema) as file:
    schema_from_yaml = yaml.load(file, Loader=yaml.SafeLoader)
    schema_from_yaml = json.dumps(schema_from_yaml, indent=2)
    schema_from_yaml = jsonref.loads(schema_from_yaml)

In [4]:
schema_from_yaml.keys()

dict_keys(['title', '$schema', 'type', 'description', 'required', 'additionalProperties', 'properties', 'definitions'])

In [5]:
schema_from_yaml['properties'].keys()

dict_keys(['predicateInfo', 'references', 'provenance', 'numericMeasures', 'categoryMeasures', 'contextRelevance'])

In [6]:
schema_from_yaml['properties']['numericMeasures']

{'description': 'Array of objects (Python list-of-dict-like). Gives measure info and which API/JSON-response field to retrieve it from',
 'type': 'array',
 'minItems': 1,
 'items': {'allOf': [{'type': 'object',
    'required': ['name'],
    'properties': {'name': {'description': 'Name of the measure (include the name of the resource that calculated it)',
      'type': 'string'},
     'ontologyTerm': {'description': 'Term from an ontology (ideally an OWL Class) that corresponds with this measure. Format is prefix:ID',
      'type': 'string'},
     'missingValueMeaning': {'description': 'Short free-text explaining what a missing/NULL/NA value would mean',
      'type': 'string'},
     'measureReferences': {'description': 'Used to provide publications and website URLs for users to learn more about an association, knowledge-source, measure, context/relevance, etc',
      'type': 'object',
      'additionalProperties': False,
      'minProperties': 1,
      'properties': {'publications': {'

## checking an example

In [7]:
yaml_example = pathlib.Path.cwd().joinpath("draft6_example_processedAssociation_disgenetDG.yaml")
with open(yaml_example) as file:
    example_from_yaml = yaml.load(file, Loader=yaml.SafeLoader)
    example_from_yaml = json.dumps(example_from_yaml, indent=2)
    example_from_yaml = jsonref.loads(example_from_yaml)

In [8]:
example_from_yaml.keys()

dict_keys(['predicateInfo', 'references', 'provenance', 'numericMeasures'])

In [9]:
example_from_yaml['numericMeasures']

[{'name': 'DisGeNET gene-disease association score',
  'value': 0.1,
  'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section31'},
  'range': {'minExclusive': 0, 'maxInclusive': 1},
  'directionMeaning': {'larger': 'more_evidence'}},
 {'name': 'DisGeNET evidence index',
  'value': 0.917,
  'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section36'},
  'missingValueMeaning': 'This measure is calculated from BeFree and PsyGeNET info. If the association was not found in those sources, this measure likely was not calculated.',
  'range': {'minExclusive': 0, 'maxInclusive': 1},
  'directionMeaning': {'larger': 'more_consistent'}},
 {'name': 'DisGeNET disease specificity index',
  'ontologyTerm': 'SIO:001351',
  'value': 0.536,
  'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section33'},
  'missingValueMeaning': 'This measure is calculated when a gene is associated with one or more diseases. If the value is missing, this gene is ann

## validate example against schema

In [10]:
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## YAYAYAYAYAY this means it validated!!!!!!!

### error: if a required field is missing

In [11]:
## but what if it's a fluke?? 
## remove a required key from one association 
removed1 = example_from_yaml['numericMeasures'][0].pop('value')

## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## so it works in catching the error yayyyyyyyy. too bad it doesn't tell you which entry had the error in it...

ValidationError: 'value' is a required property

Failed validating 'required' in schema['properties']['numericMeasures']['items']['allOf'][1]:
    {'properties': {'directionMeaning': {'description': 'Object, Python '
                                                        'dict-like. Each '
                                                        'key is a '
                                                        'direction '
                                                        '(larger, '
                                                        'closer_to_zero, '
                                                        'more_negative, '
                                                        'etc.). Each value '
                                                        'is what the '
                                                        'direction means '
                                                        '(more_specific, '
                                                        'more_evidence, '
                                                        'more_confident, '
                                                        'more_consistent, '
                                                        'more_probable, '
                                                        'etc.). A '
                                                        'developer may '
                                                        'want to explain '
                                                        'multiple '
                                                        'directions (e.g. '
                                                        'when a value '
                                                        'means something '
                                                        'different when '
                                                        'closer to 0 vs. '
                                                        'closer to the '
                                                        'upper bound vs. '
                                                        'closer to the '
                                                        'lower bound)',
                                         'minProperties': 1,
                                         'patternProperties': {'.': {'type': 'string'}},
                                         'type': 'object'},
                    'range': {'additionalProperties': False,
                              'description': 'Object, Python dict-like. '
                                             'Defines an expected lower '
                                             'and/or upper bound for '
                                             'values (minimum and '
                                             'maximum). Inclusive means '
                                             'the range includes the '
                                             'boundary number; exclusive '
                                             'means the range does not. If '
                                             'the actual lower-bound is '
                                             'negative-infinity and/or the '
                                             'actual upper bound is '
                                             'positive-infinity, do not '
                                             'set a bound',
                              'minProperties': 1,
                              'not': {'anyOf': [{'required': ['minExclusive',
                                                              'minInclusive'],
                                                 'type': 'object'},
                                                {'required': ['maxExclusive',
                                                              'maxInclusive'],
                                                 'type': 'object'}]},
                              'properties': {'maxExclusive': {'type': 'number'},
                                             'maxInclusive': {'type': 'number'},
                                             'minExclusive': {'type': 'number'},
                                             'minInclusive': {'type': 'number'}},
                              'type': 'object'},
                    'units': {'description': 'Units for the measure values',
                              'type': 'string'},
                    'value': {'type': 'number'}},
     'required': ['value', 'directionMeaning'],
     'type': 'object'}

On instance['numericMeasures'][0]:
    {'directionMeaning': {'larger': 'more_evidence'},
     'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section31'},
     'name': 'DisGeNET gene-disease association score',
     'range': {'maxInclusive': 1, 'minExclusive': 0}}

In [12]:
example_from_yaml['numericMeasures'][0]['value'] = removed1

In [13]:
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## and it's back and accepted again 

### error: if there are two mins in range

In [14]:
## another check: look at the range for the first numericMeasure
example_from_yaml['numericMeasures'][0]['range']

{'minExclusive': 0, 'maxInclusive': 1}

In [16]:
## add an error: minInclusive
example_from_yaml['numericMeasures'][0]['range']['minInclusive'] = -1

## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)

ValidationError: {'anyOf': [{'type': 'object', 'required': ['minExclusive', 'minInclusive']}, {'type': 'object', 'required': ['maxExclusive', 'maxInclusive']}]} is not allowed for {'minExclusive': 0, 'maxInclusive': 1, 'minInclusive': -1}

Failed validating 'not' in schema['properties']['numericMeasures']['items']['allOf'][1]['properties']['range']:
    {'additionalProperties': False,
     'description': 'Object, Python dict-like. Defines an expected lower '
                    'and/or upper bound for values (minimum and maximum). '
                    'Inclusive means the range includes the boundary '
                    'number; exclusive means the range does not. If the '
                    'actual lower-bound is negative-infinity and/or the '
                    'actual upper bound is positive-infinity, do not set a '
                    'bound',
     'minProperties': 1,
     'not': {'anyOf': [{'required': ['minExclusive', 'minInclusive'],
                        'type': 'object'},
                       {'required': ['maxExclusive', 'maxInclusive'],
                        'type': 'object'}]},
     'properties': {'maxExclusive': {'type': 'number'},
                    'maxInclusive': {'type': 'number'},
                    'minExclusive': {'type': 'number'},
                    'minInclusive': {'type': 'number'}},
     'type': 'object'}

On instance['numericMeasures'][0]['range']:
    {'maxInclusive': 1, 'minExclusive': 0, 'minInclusive': -1}

In [17]:
## fix the error and re-validate
example_from_yaml['numericMeasures'][0]['range'].pop('minInclusive')

## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)

-1

## Export JSON files for the yamls

In [18]:
json_schema_path = pathlib.Path.cwd().joinpath("draft6_schema_processedAssociation.json")
with open(json_schema_path, "w") as file:
    json.dump(schema_from_yaml, file, indent=2)

In [19]:
json_example_path = pathlib.Path.cwd().joinpath("draft6_example_processedAssociation_disgenetDG.json")
with open(json_example_path, "w") as file:
    json.dump(example_from_yaml, file, indent=2)