# Tuorial on json validation and json schema inspection via `omdata` package

In [1]:
import pprint as pp
import json

#### Pointers to sample json and schema files

In [2]:
schema_file = './data/samples/omsurvey.schema'
survey_file = './data/samples/omsurvey.json'

In [3]:
with open(schema_file) as schema_fp:
    om_schema = json.load(schema_fp)

In [4]:
#pp.pprint(om_schema,indent=0)

In [5]:
with open(survey_file) as survey_fp:
    om_example = json.load(survey_fp)

In [6]:
#pp.pprint(om_example,indent=0)

## Validation 

In [7]:
from omdata import check_json, check_file

In [8]:
help(check_json)

Help on function check_json in module omdata.validator:

check_json(survey, schema)
    The method checks validity of an input json against the schema.
    
    Args:
        survey (:obj:`JSON`): A JSON object.
        schema (:obj:`JSON`): A JSON schema.
    
    Returns:
        (`bool`): Returns `True` when it is validated.



In [9]:
check_json(om_example, om_schema)

True

In [10]:
help(check_file)

Help on function check_file in module omdata.validator:

check_file(survey_file, schema_file)
    Given external files the method checks validity of an input json file
        against the schema file.
    
    Args:
        survey (:obj:`string`): A JSON data file.
        schema (:obj:`string`): A JSON schema file.
    
    Returns:
        (bool): True.



In [11]:
check_file(survey_file, schema_file)

True

## Inspecting a given json schema

In [12]:
from omdata import Schema, OMSchemaKeyError, OMSchemaUnknown
from omdata import get_field_names

### Retrieving paths to data fields 

In [13]:
get_field_names(om_schema)

['/properties/id',
 '/properties/member_id',
 '/properties/name',
 '/properties/surname',
 '/properties/email',
 '/properties/linkedin',
 '/properties/twitter',
 '/properties/community affiliations/items/properties/name',
 '/properties/community affiliations/items/properties/city',
 '/properties/community affiliations/items/properties/country',
 '/properties/funding/properties/availability',
 '/properties/funding/properties/amount',
 '/properties/funding/properties/success rate',
 '/properties/profile/properties/domains/items',
 '/properties/profile/properties/categories/items',
 '/properties/profile/properties/technologies/items',
 '/properties/profile/properties/skills/items',
 '/properties/projects/properties/count',
 '/properties/projects/properties/indivudual',
 '/properties/projects/properties/team',
 '/properties/projects/properties/partners/items/properties/name',
 '/properties/projects/properties/partners/items/properties/surname',
 '/properties/projects/properties/partners/it

### Using the omdata.Schema object

In [14]:
help(Schema)

Help on class Schema in module omdata.design:

class Schema(builtins.object)
 |  A generic interface object for JSON Schemas of OpenMaker Project.
 |  
 |  Attributes:
 |      schema_file (:obj:`string`): A file path to a JSON schema file.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, schema=None)
 |      The class constructor.
 |      
 |      Args:
 |          schema (:obj:`JSON`): A JSON object which describes the schema (default None).
 |  
 |  get_fields(self, schema=None, main_only=False)
 |      The method lists all of the fields of a given schema. It parses the entire 
 |          schema in JSON format.
 |      
 |      Args:
 |          schema (:obj:`JSON`): A JSON object which describes the schema (default None).
 |          
 |      Returns:
 |          (:obj:`list` of :obj:`string`): list of field names.
 |  
 |  get_questionaire_matches(self)
 |      The method lists all of the fields of a given schema. It parses the entire 
 |          schema in JSON format.
 |  

In [15]:
S = Schema()

In [16]:
S.load(om_schema)

True

In [17]:
S.load_from_file(schema_file)

True

**Note that** the schema to be inspected can be loaded either from a schema object a python `dict` or directly from a file.

### Checking top level fields

In [18]:
S.get_fields(main_only = True)

['id',
 'member_id',
 'name',
 'surname',
 'email',
 'linkedin',
 'twitter',
 'community affiliations',
 'funding',
 'profile',
 'projects',
 'behavior',
 'values']

### Lsting all data fields including the nested ones
Note that a field followed by an asterix `*` denotes that there might be a multiple of them. In other words the field itself is a Python `list` object.

In [19]:
S.get_fields()

['id',
 'member_id',
 'name',
 'surname',
 'email',
 'linkedin',
 'twitter',
 'community affiliations*.name',
 'community affiliations*.city',
 'community affiliations*.country',
 'funding.availability',
 'funding.amount',
 'funding.success rate',
 'profile.domains*',
 'profile.categories*',
 'profile.technologies*',
 'profile.skills*',
 'projects.count',
 'projects.indivudual',
 'projects.team',
 'projects.partners*.name',
 'projects.partners*.surname',
 'projects.partners*.email',
 'projects.partners*.linkedin',
 'projects.partners*.twitter',
 'projects.domains*',
 'projects.categories*',
 'projects.technologies*',
 'projects.skills*',
 'projects.description',
 'behavior.complementarity',
 'behavior.similarity',
 'behavior.locality',
 'behavior.socialization',
 'behavior.resemblance',
 'values.power',
 'values.achievement',
 'values.hedonism',
 'values.stimulation',
 'values.selfdirection',
 'values.universalism',
 'values.benevolence',
 'values.tradition',
 'values.conformity',
 'va

### Checking required fields

In [20]:
S.get_required_fields()

['id',
 'name',
 'surname',
 'email',
 'community affiliations',
 'funding',
 'profile',
 'projects',
 'behavior']

### Listing data field descriptions
in this particular case where the JSON schema is designed for an OpenMaker survey, the description matches the corresponding questionaire items. In other words the source of data  where they are populted from.

In [21]:
S.get_questionaire_matches()

{'behavior.complementarity': '5.a',
 'behavior.locality': '5.c',
 'behavior.resemblance': '5.e',
 'behavior.similarity': '5.b',
 'behavior.socialization': '5.d',
 'community affiliations*.city': '1.b',
 'community affiliations*.country': '1.c',
 'community affiliations*.name': '1.a',
 'email': 'Email',
 'funding.amount': '2.b',
 'funding.availability': '2.a',
 'funding.success rate': '2.c',
 'id': 'Internal: The unique survey identifier.',
 'linkedin': 'LinkedIn',
 'member_id': 'Internal: The unique OpenMaker member ID.',
 'name': 'Name',
 'profile.categories*': '3.b',
 'profile.domains*': '3.a',
 'profile.skills*': '3.d',
 'profile.technologies*': '3.c',
 'projects.categories*': '4.c.ii',
 'projects.count': '4.a',
 'projects.description': '4.c.v',
 'projects.domains*': '4.c.i',
 'projects.indivudual': '4.a.i',
 'projects.partners*.email': '4.b.iii',
 'projects.partners*.linkedin': '4.b.iv',
 'projects.partners*.name': '4.b.i',
 'projects.partners*.surname': '4.b.ii',
 'projects.partne