# Manipulate JCOIN 1.0 schemas

In [1]:
import json
import yaml
import os

## Harvest names of all properties and constraints

In [2]:
def harvest(schema):
    with open(schema, 'r') as file:
        data = json.load(file)
    properties = set()
    constraints = set()
    custom = set()
    for field in data['fields']:
        properties.update(field.keys())
        if field.get('constraints'):
            constraints.update(field['constraints'].keys())
        if 'custom' in field.keys():
            custom.update(field['custom'].keys())

    print(properties)
    print(constraints)
    print(custom)

In [3]:
harvest('../schemas/common_measures_1.0/json/table-schema-baseline.json')

{'jcoin:optional', 'constraints', 'trueValues', 'jcoin:section', 'jcoin:notes', 'title', 'description', 'type', 'falseValues', 'name', 'jcoin:originalName'}
{'required', 'pattern', 'maxLength', 'enum'}
set()


In [4]:
harvest('../schemas/common_measures_1.0/json/table-schema-longitudinal.json')

{'format', 'jcoin:optional', 'constraints', 'trueValues', 'jcoin:section', 'custom', 'title', 'description', 'type', 'falseValues', 'name', 'jcoin:originalName'}
{'maxLength', 'required', 'pattern', 'enum', 'maximum', 'minimum'}
{'jcoin:notes', 'jcoin:originalName'}


In [5]:
harvest('../schemas/common_measures_1.0/json/table-schema-admin.json')

{'name', 'jcoin:notes', 'title', 'description', 'type', 'constraints', 'jcoin:originalName'}
{'required', 'pattern', 'maxLength', 'enum'}
set()


In [6]:
harvest('../schemas/common_measures_1.0/json/table-schema-staff-baseline.json')

{'constraints', 'trueValues', 'jcoin:section', 'custom', 'title', 'description', 'type', 'falseValues', 'name'}
{'required', 'pattern', 'maxLength', 'enum'}
{'jcoin:notes', 'jcoin:originalName'}


In [7]:
harvest('../schemas/common_measures_1.0/json/table-schema-staff-longitudinal.json')

{'format', 'constraints', 'jcoin:section', 'custom', 'example', 'title', 'description', 'type', 'name'}
{'maxLength', 'required', 'pattern', 'enum', 'minimum'}
{'jcoin:originalName', 'jcoin:source'}


## Regularize schema

In [8]:
def regularize(schema):
    with open(schema, 'r') as file:
        data = json.load(file)

    regularized = {'$schema':data['$schema'],
                   'title':data['title'],
                   'version':data['version'],
                   'description':data['description'],
                   'primaryKey':data['primaryKey'],
                   'missingValues':data['missingValues'],
                   'fieldsMatch':data['fieldsMatch'],
                   'jcoin:schemaType':data['jcoin:schemaType'],
                   'fields':[]}
    for field in data['fields']:
        f = {'name':field['name']}
        if field.get('type'):
            f['type'] = field['type']
        if field.get('format'):
            f['format'] = field['format']
        if field.get('trueValues'):
            f['trueValues'] = field['trueValues']
        if field.get('falseValues'):
            f['falseValues'] = field['falseValues']
        if field.get('title'):
            f['title'] = field['title']
        if field.get('description'):
            f['description'] = field['description']
        if field.get('example'):
            f['example'] = field['example']
        if field.get('constraints'):
            f['constraints'] = {}
            if field['constraints'].get('required'):
                f['constraints']['required'] = field['constraints']['required']
            if field['constraints'].get('maxLength'):
                f['constraints']['maxLength'] = field['constraints']['maxLength']
            if field['constraints'].get('minimum'):
                f['constraints']['minimum'] = field['constraints']['minimum']
            if field['constraints'].get('maximum'):
                f['constraints']['maximum'] = field['constraints']['maximum']
            if field['constraints'].get('pattern'):
                f['constraints']['pattern'] = field['constraints']['pattern']
            if field['constraints'].get('enum'):
                f['constraints']['enum'] = field['constraints']['enum']
        if field.get('jcoin:section'):
            f['jcoin:section'] = field['jcoin:section']
        f['jcoin:baseline'] = None
        f['jcoin:followup'] = None
        if field.get('jcoin:originalName'):
            f['jcoin:originalName'] = field['jcoin:originalName']
        if field.get('custom'):
            if field['custom'].get('jcoin:originalName'):
                f['jcoin:originalName'] = field['custom']['jcoin:originalName']
        if field.get('jcoin:optional'):
            f['jcoin:optional'] = field['jcoin:optional']
        if field.get('jcoin:notes'):
            f['jcoin:notes'] = field['jcoin:notes']
        if field.get('custom'):
            if field['custom'].get('jcoin:notes'):
                f['jcoin:notes'] = field['custom']['jcoin:notes']
        if field.get('jcoin:source'):
            f['jcoin:source'] = field['jcoin:source']
        if field.get('custom'):
            if field['custom'].get('jcoin:source'):
                f['jcoin:source'] = field['custom']['jcoin:source']
        regularized['fields'].append(f)

    return regularized

## Write schemas in YAML format

In [9]:
def write_schema(schema, filepath):
    with open(filepath, 'w') as file:
        yaml.dump(schema, file, indent=2, sort_keys=False)

In [10]:
os.makedirs('../tmp', exist_ok=True)
write_schema(regularize('../schemas/common_measures_1.0/json/table-schema-baseline.json'),
             '../tmp/jcoin_client_baseline.yaml')
write_schema(regularize('../schemas/common_measures_1.0/json/table-schema-longitudinal.json'),
             '../tmp/jcoin_client_followup.yaml')
write_schema(regularize('../schemas/common_measures_1.0/json/table-schema-admin.json'),
             '../tmp/jcoin_client_admin.yaml')
write_schema(regularize('../schemas/common_measures_1.0/json/table-schema-staff-baseline.json'),
             '../tmp/jcoin_staff_baseline.yaml')
write_schema(regularize('../schemas/common_measures_1.0/json/table-schema-staff-longitudinal.json'),
             '../tmp/jcoin_staff_followup.yaml')