# Sandbox to use Python tools to check JSONs/YAMLs

In [1]:
## CX: allows multiple lines of code to print from one code block
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import json
import jsonschema
import pathlib
import jsonref
import yaml

In [2]:
pathlib.Path.cwd()

PosixPath('/Users/jay/Desktop/biothings_explorer/jupyter notebooks/CX_WIPs/Draft_Metadata')

## loading schema

In [3]:
yaml_schema = pathlib.Path.cwd().joinpath("draft6_schema_x-bte-association-retrieval.yaml")
with open(yaml_schema) as file:
    schema_from_yaml = yaml.load(file, Loader=yaml.SafeLoader)
    schema_from_yaml = json.dumps(schema_from_yaml, indent=2)
    schema_from_yaml = jsonref.loads(schema_from_yaml)

In [4]:
schema_from_yaml.keys()

dict_keys(['title', '$schema', 'type', 'description', 'properties', 'definitions'])

In [5]:
schema_from_yaml['properties'].keys()

dict_keys(['components'])

In [6]:
schema_from_yaml['properties']['components']['properties']\
['x-bte-association-retrieval']['patternProperties']['.']['properties'].keys()

dict_keys(['queryInfo', 'inputs', 'outputs', 'predicateInfo', 'references', 'provenance', 'numericMeasures', 'categoryMeasures', 'contextRelevance'])

In [7]:
schema_from_yaml['properties']['components']['properties']\
['x-bte-association-retrieval']['patternProperties']['.']['properties']['numericMeasures']

{'description': 'Array of objects (Python-list-of-dict-like), gives info about  the measure and what response field to use to retrieve it. ',
 'type': 'array',
 'items': {'description': 'Numeric measures have additional required info, like range and the  meaning of directions. Units may also be specified. ',
  'type': 'object',
  'allOf': [{'type': 'object',
    'required': ['name', 'responseField'],
    'properties': {'name': {'description': 'name the measure, starting with the name of the resource that  calculated/assigned it. In display format (source way of  capitalizing the names, with spaces).  ',
      'type': 'string'},
     'ontologyTerm': {'description': 'Term from an ontology (ideally an OWL Class) that corresponds with this measure. Format is prefix:ID. ',
      'type': 'string'},
     'responseField': {'description': 'The measure should have the value of a specific field in the API  response/JSON record. Use dot-notation to refer to the response field.',
      'type': 'str

## checking an example

In [8]:
yaml_example = pathlib.Path.cwd().joinpath("draft6_example_disgenetDG.yaml")
with open(yaml_example) as file:
    example_from_yaml = yaml.load(file, Loader=yaml.SafeLoader)
    example_from_yaml = json.dumps(example_from_yaml, indent=2)
    example_from_yaml = jsonref.loads(example_from_yaml)

In [9]:
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1'].keys()

dict_keys(['queryInfo', 'inputs', 'outputs', 'predicateInfo', 'references', 'provenance', 'numericMeasures'])

In [10]:
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']['numericMeasures']

[{'name': 'DisGeNET gene-disease association score',
  'responseField': 'disgenet.genes_related_to_disease.score',
  'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section31'},
  'range': {'minExclusive': 0, 'maxInclusive': 1},
  'directionMeaning': {'larger': 'more_evidence'}},
 {'name': 'DisGeNET evidence index',
  'responseField': 'disgenet.genes_related_to_disease.EI',
  'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section36'},
  'missingValueMeaning': 'This measure is calculated from BeFree and PsyGeNET info.  If the association was not found in those sources, this  measure likely was not calculated. ',
  'range': {'minExclusive': 0, 'maxInclusive': 1},
  'directionMeaning': {'larger': 'more_consistent'}},
 {'name': 'DisGeNET disease specificity index',
  'ontologyTerm': 'SIO:001351',
  'responseField': 'disgenet.genes_related_to_disease.DSI',
  'measureReferences': {'websites': 'https://www.disgenet.org/dbinfo#section33'},
  'missingValue

## validate example against schema

In [11]:
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## YAYAYAYAYAY this means it validated!!!!!!!

In [12]:
## but what if it's a fluke?? 
## remove a required key from one association 
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['references']['publications']['pmid'].pop('responseField')

## then try to validate, this 
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## so it works in catching the error yayyyyyyyy. too bad it doesn't tell you which entry had the error in it...

'disgenet.genes_related_to_disease.pubmed'

ValidationError: 'responseField' is a required property

Failed validating 'required' in schema[0]:
    {'description': 'Edge property is dynamic and should have the value of '
                    'a specific field in  the API response/JSON record. In '
                    'this situation, a field called  responseField is '
                    'used. Use dot-notation to refer to the response '
                    'field. ',
     'properties': {'responseField': {'type': 'string'}},
     'required': ['responseField'],
     'type': 'object'}

On instance:
    {}

In [13]:
example_from_yaml['components']['x-bte-association-retrieval']['disease-gene1']\
['references']['publications']['pmid']['responseField'] = 'disgenet.genes_related_to_disease.pubmed'

In [14]:
jsonschema.validate(instance=example_from_yaml, schema=schema_from_yaml)
## and it's back and accepted again 

## Export JSON files for the yamls

In [15]:
json_schema_path = pathlib.Path.cwd().joinpath("draft6_schema_x-bte-association-retrieval.json")
with open(json_schema_path, "w") as file:
    json.dump(schema_from_yaml, file, indent=2)

In [16]:
json_example_path = pathlib.Path.cwd().joinpath("draft6_example_disgenetDG.json")
with open(json_example_path, "w") as file:
    json.dump(example_from_yaml, file, indent=2)