## Cardinality Clean up script

This script adds `owl:cardinality` information into the validation which will be ignored by any json schema validator (as this is the validation section is not the appropriate place for `owl:cardinality`. It is strictly being added to ensure ease of use by downstream scripts.

In [1]:
import json
import os
from biothings_schema import Schema

In [5]:
def fix_owl (bioschema_spec):
    context_spec = bioschema_spec['@context']
    context_spec['owl'] = 'http://www.w3.org/2002/07/owl/'
    return(context_spec)
    
def add_cardinality(propdict):
    propertylist = propdict.keys()
    for eachkey in propertylist:
        tmpdict = propdict[eachkey]
        dictstring = json.dumps(tmpdict)
        if "array" in dictstring:
            tmpdict['owl:cardinality'] = 'many'
        else:
            tmpdict['owl:cardinality'] = 'one'
        propdict[eachkey] = tmpdict
    return propdict  

def update_file(tmpdir,filelist):
    for eachfile in filelist:
        print("file opened: ",eachfile)
        with open(os.path.join(tmpdir,eachfile),'r', encoding='utf-8') as injson:
            bioschema = json.load(injson)
        newschema = {}
        newschema['@context'] = fix_owl(bioschema)
        graphlist = []
        exportflag=False
        for eachitem in bioschema['@graph']:
            if "$validation" in list(eachitem.keys()):
                propdict = eachitem['$validation']['properties']  
                propdict = add_cardinality(propdict)
                eachitem['$validation']['properties'] = propdict
                graphlist.append(eachitem)
                exportflag=True
            else:
                continue
        if exportflag == True:
            newschema['@graph'] = graphlist
            with open(os.path.join(tmpdir,eachfile),'w') as outjson:
                outjson.write(json.dumps(newschema, indent=4, sort_keys=False))
            
def update_specs(script_path):
    resulting_json = os.path.join(script_path,'results','resulting_json')
    specs = os.listdir(resulting_json)
    for specclass in specs:
        if '.txt' not in specclass:
            tmpdir = os.path.join(resulting_json,specclass,'jsonld')
            filelist = os.listdir(tmpdir)
            update_file(tmpdir,filelist)

def check_validation(script_path):
    resulting_json = os.path.join(script_path,'results','resulting_json')
    specs = os.listdir(resulting_json)
    ignorelist = ['Gene'] ## things that were manually checked online but fail locally
    workinglist = ['ChemicalSubstance','ComputationalWorkflow','failures.txt','FormalParameter',
                   'Beacon','BioChemEntity','BioChemStructure','ComputationalTool',
                   'Course','CourseInstance','DataCatalog','DataRecord','Dataset',
                   'Event','Journal','LabProtocol','failures.txt','BioSample','DNA',
                   'Disease','MolecularEntity','Organization','Person','Phenotype',
                   'Protein','ProteinAnnotation','ProteinStructure','PublicationIssue',
                   'PublicationVolume','RNA','Sample','ScholarlyArticle','SequenceRange',
                   'SemanticTextAnnotation','SequenceAnnotation','SequenceMatchingModel',
                   'Study','Enzyme','Taxon','TaxonName','TrainingMaterials'] ##validating fine, ignore to save time
    #ignoreall = list(set(ignorelist).union(set(workinglist)))
    ignoreall = ['failures.txt']
    for specclass in specs:
        print(specclass)
        if specclass not in ignoreall:
            tmpdir = os.path.join(resulting_json,specclass,'jsonld')
            filelist = os.listdir(tmpdir)
            for eachfile in filelist:
                print(eachfile)
                with open(os.path.join(tmpdir,eachfile),'r') as injson:
                    bioschema = json.load(injson)
                    sc = Schema(bioschema, base_schema=["schema.org","bioschemastypes","bioschemas",
                                                        "bioschemasdrafts","bioschemastypesdrafts",
                                                        "bioschemasdeprecated"])
                    valipass = sc.validation

In [6]:
## Main
script_path = ''
#update_specs(script_path)

## Check validation for manual fixes

In [7]:
script_path = ''
check_validation(script_path)

Beacon
Beacon_v0.2-DRAFT-2018_04_23-DEPRECATED.json
BioChemEntity
BioChemEntity_v0.7-RELEASE.json
BioChemEntity_v0.8-DRAFT.jsonld
BioChemStructure
BioChemStructure_v0.1-DRAFT-2019_06_20.json
BioSample
BioSample_v0.1-RELEASE.json
ChemicalSubstance
ChemicalSubstance_v0.1-DRAFT-2018_12_07.json
ChemicalSubstance_v0.2-DRAFT-2019_06_11.json
ChemicalSubstance_v0.2-RELEASE-2019_06_19.json
ChemicalSubstance_v0.3-DRAFT-2019_11_11.json
ChemicalSubstance_v0.3-RELEASE-2019_09_02.json
ChemicalSubstance_v0.4-RELEASE.json
ComputationalTool
ComputationalTool_v0.5-DRAFT.json
ComputationalTool_v0.6-DRAFT.json
ComputationalTool_v1.0-RELEASE.json
ComputationalWorkflow
ComputationalWorkflow_v0.1-DRAFT-2019_02_08.json
ComputationalWorkflow_v0.2-DRAFT-2019_11_29.json
ComputationalWorkflow_v0.3-DRAFT-2020_03_03.json
ComputationalWorkflow_v0.4-DRAFT-2020_05_11.json
ComputationalWorkflow_v0.5-DRAFT-2020_07_21.json
ComputationalWorkflow_v1.0-RELEASE-Type.json
ComputationalWorkflow_v1.0-RELEASE.json
Course
Course_



Study_v0.3-DRAFT.json
Taxon
Taxon_v0.1-DRAFT-2018_06_27.json




Taxon_v0.1-DRAFT-2018_09_25.json
Taxon_v0.1-DRAFT-2018_09_26.json
Taxon_v0.2-DRAFT-2018_09_26.json
Taxon_v0.3-DRAFT-2018_11_09.json
Taxon_v0.3-DRAFT-2018_11_10.json
Taxon_v0.3-RELEASE-2018_11_10.json
Taxon_v0.3-RELEASE-2019_11_18.json
Taxon_v0.4-DRAFT-2019_06_19.json
Taxon_v0.4-DRAFT-2019_06_24.json
Taxon_v0.4-DRAFT.json
Taxon_v0.5-DRAFT-2020_04_06.json
Taxon_v0.6-RELEASE.json
Taxon_v0.7-DRAFT.json




TaxonName
TaxonName_v0.1-DRAFT-TYPE.json
TaxonName_v0.1-DRAFT.json
Tool
TrainingMaterial
TrainingMaterial_v0.2-DRAFT-2018_03_07.json
TrainingMaterial_v0.4-DRAFT-2018_11_16.json
TrainingMaterial_v0.4-DRAFT-2019_02_08.json
TrainingMaterial_v0.5-DRAFT-2019_02_25.json
TrainingMaterial_v0.6-DRAFT-2019_06_06.json
TrainingMaterial_v0.8-DRAFT-2020_10_06.json
TrainingMaterial_v0.9-DRAFT-2020_12_08.json
