 ## A simple script to add 'sameAs'

This script adds the property 'sameAs' as a recommended property to every profile

Note- This script does not add properties defined in a profile, which may case some profiles to stop working. While it can be updated to include the missing properties, it's not worth the effort as this script will likely not be used again

In [2]:
import json
import os
from biothings_schema import Schema

In [5]:

def add_sameAs(propdict):
    propertylist = propdict.keys()
    if 'sameAs' not in propertylist:
        propdict['sameAs'] = {"oneOf": [{"type":"string","format":"uri"},
                                        {"type": "array",
                                          "items": {
                                            "type": "string",
                                            "format": "uri"
                                          }
                                        }],
                              "description": "URL of a reference Web page that unambiguously indicates the item's identity. E.g. the URL of the item's Wikipedia page, Wikidata entry, or official website.",
                              "owl:cardinality": "many"
                             }
    return propdict  

def update_file(tmpdir,filelist):
    for eachfile in filelist:
        print("file opened: ",eachfile)
        with open(os.path.join(tmpdir,eachfile),'r', encoding='utf-8') as injson:
            bioschema = json.load(injson)
        newschema = {}
        newschema['@context'] = bioschema['@context']
        graphlist = []
        exportflag=False
        for eachitem in bioschema['@graph']:
            if "$validation" in list(eachitem.keys()):
                propdict = eachitem['$validation']['properties']  
                propdict = add_sameAs(propdict)
                eachitem['$validation']['properties'] = propdict
                try:
                    recommendedlist = eachitem['$validation']['recommended']
                    if 'sameAs' not in recommendedlist:
                        recommendedlist.append('sameAs')
                    eachitem['$validation']['recommended'] = recommendedlist
                except:
                    eachitem['$validation']['recommended'] = ['recommended']
                graphlist.append(eachitem)
                exportflag=True
            else:
                continue
        if exportflag == True:
            newschema['@graph'] = graphlist
            with open(os.path.join(tmpdir,eachfile),'w') as outjson:
                outjson.write(json.dumps(newschema, indent=4, sort_keys=False))
            
def update_specs(script_path):
    resulting_json = script_path
    specs = os.listdir(resulting_json)
    for specclass in specs:
        if '.txt' or '.git' not in specclass:
            try:
                tmpdir = os.path.join(resulting_json,specclass,'jsonld')
                filelist = os.listdir(tmpdir)
                update_file(tmpdir,filelist)
            except:
                print(specclass)

def check_validation(script_path):
    specs = os.listdir(script_path)
    ignorelist = ['Gene'] ## things that were manually checked online but fail locally
    workinglist = ['ChemicalSubstance','ComputationalWorkflow','failures.txt','FormalParameter',
                   'Beacon','BioChemEntity','BioChemStructure','ComputationalTool',
                   'Course','CourseInstance','DataCatalog','DataRecord','Dataset',
                   'Event','Journal','LabProtocol','failures.txt','BioSample','DNA',
                   'Disease','MolecularEntity','Organization','Person','Phenotype',
                   'Protein','ProteinAnnotation','ProteinStructure','PublicationIssue',
                   'PublicationVolume','RNA','Sample','ScholarlyArticle','SequenceRange',
                   'SemanticTextAnnotation','SequenceAnnotation','SequenceMatchingModel',
                   'Study','Enzyme','Taxon','TaxonName','TrainingMaterials'] ##validating fine, ignore to save time
    #ignoreall = list(set(ignorelist).union(set(workinglist)))
    ignoreall = ['failures.txt']
    #ignoreall = workinglist
    for specclass in specs:
        if specclass not in ignoreall:
            try:
                tmpdir = os.path.join(script_path,specclass,'jsonld')
                filelist = os.listdir(tmpdir)
                for eachfile in filelist:
                    print(eachfile)
                    with open(os.path.join(tmpdir,eachfile),'r') as injson:
                        bioschema = json.load(injson)
                        sc = Schema(bioschema, base_schema=["schema.org","bioschemastypes","bioschemas",
                                                            "bioschemasdrafts","bioschemastypesdrafts",
                                                            "bioschemasdeprecated"])
                        valipass = sc.validation
                        print(eachfile," success!")
            except:
                print(specclass," fail!")

In [3]:
## Main
script_path = os.getcwd()
parent_path = os.path.dirname(script_path)
#print(os.listdir(parent_path))
bioschemas_spec_path = os.path.join(parent_path,'specifications')
#print(bioschemas_spec_path)
print(script_path)
#update_specs(bioschemas_spec_path)

C:\Users\Avatar\anaconda3\envs\outbreak\bioschemas\bioschemas_yml_to_dde_json


## Check validation for manual fixes

In [None]:
script_path = ''
check_validation(bioschemas_spec_path)

.git  fail!
.gitignore  fail!
Beacon_v0.2-DRAFT-2018_04_23-DEPRECATED.json




Beacon_v0.2-DRAFT-2018_04_23-DEPRECATED.json  success!
BioChemEntity_v0.7-RELEASE.json
BioChemEntity_v0.7-RELEASE.json  success!
BioChemEntity_v0.8-DRAFT.jsonld
BioChemEntity_v0.8-DRAFT.jsonld  success!
BioChemicalReaction  fail!
BioChemStructure_v0.1-DRAFT-2019_06_20.json
BioChemStructure_v0.1-DRAFT-2019_06_20.json  success!
BioSample_v0.1-RELEASE.json
BioSample_v0.1-RELEASE.json  success!
BioschemasRelease1_Notes  fail!
BioschemasRelease2_Notes  fail!
ChemicalSubstance_v0.1-DRAFT-2018_12_07.json
ChemicalSubstance_v0.1-DRAFT-2018_12_07.json  success!
ChemicalSubstance_v0.2-DRAFT-2019_06_11.json
ChemicalSubstance_v0.2-DRAFT-2019_06_11.json  success!
ChemicalSubstance_v0.2-RELEASE-2019_06_19.json
ChemicalSubstance_v0.2-RELEASE-2019_06_19.json  success!
ChemicalSubstance_v0.3-DRAFT-2019_11_11.json
ChemicalSubstance_v0.3-DRAFT-2019_11_11.json  success!
ChemicalSubstance_v0.3-RELEASE-2019_09_02.json
ChemicalSubstance_v0.3-RELEASE-2019_09_02.json  success!
ChemicalSubstance_v0.4-RELEASE.jso



ComputationalTool_v0.5-DRAFT.json  success!
ComputationalTool_v0.6-DRAFT.json
ComputationalTool_v0.6-DRAFT.json  success!
ComputationalTool_v1.0-RELEASE.json
ComputationalTool_v1.0-RELEASE.json  success!
ComputationalTool_v1.1-DRAFT.json
ComputationalTool_v1.1-DRAFT.json  success!
ComputationalWorkflow_v0.1-DRAFT-2019_02_08.json
ComputationalWorkflow_v0.1-DRAFT-2019_02_08.json  success!
ComputationalWorkflow_v0.2-DRAFT-2019_11_29.json
ComputationalWorkflow_v0.2-DRAFT-2019_11_29.json  success!
ComputationalWorkflow_v0.3-DRAFT-2020_03_03.json
ComputationalWorkflow_v0.3-DRAFT-2020_03_03.json  success!
ComputationalWorkflow_v0.4-DRAFT-2020_05_11.json
ComputationalWorkflow_v0.4-DRAFT-2020_05_11.json  success!
ComputationalWorkflow_v0.5-DRAFT-2020_07_21.json
ComputationalWorkflow_v0.5-DRAFT-2020_07_21.json  success!
ComputationalWorkflow_v1.0-RELEASE-Type.json
ComputationalWorkflow_v1.0-RELEASE-Type.json  success!
ComputationalWorkflow_v1.0-RELEASE.json
ComputationalWorkflow_v1.0-RELEASE.jso



DataCatalog_v0.1-DRAFT-2018_04_25.json  success!
DataCatalog_v0.2-DRAFT-2018_11_13.json
DataCatalog_v0.2-DRAFT-2018_11_13.json  success!
DataCatalog_v0.2-DRAFT-2019_01_15.json
DataCatalog_v0.2-DRAFT-2019_01_15.json  success!
DataCatalog_v0.2-DRAFT-2019_01_28.json
DataCatalog_v0.2-DRAFT-2019_01_28.json  success!
DataCatalog_v0.2-RELEASE-2019_06_14.json
DataCatalog_v0.2-RELEASE-2019_06_14.json  success!
DataCatalog_v0.3-DRAFT-2019_06_20.json
DataCatalog_v0.3-DRAFT-2019_06_20.json  success!
DataCatalog_v0.3-RELEASE.json
DataCatalog_v0.3-RELEASE.json  success!
DataCatalog_v0.4-DRAFT.json
DataCatalog_v0.4-DRAFT.json  success!
DataRecord_v0.1-DRAFT-2018_04_25-DEPRECATED.json
DataRecord_v0.1-DRAFT-2018_04_25-DEPRECATED.json  success!
DataRecord_v0.2-DRAFT-2019_06_14-DEPRECATED-profile.json
DataRecord_v0.2-DRAFT-2019_06_14-DEPRECATED-profile.json  success!
DataRecord_v0.2-DRAFT-2019_06_14-DEPRECATED.json
DataRecord_v0.2-DRAFT-2019_06_14-DEPRECATED.json  success!
DataRecord_v0.3-DRAFT-2019_06_2

ScholarlyArticle_v0.1-DRAFT-2019_03_15.json  success!
ScholarlyArticle_v0.2-DRAFT-2020_12_03.json
ScholarlyArticle_v0.2-DRAFT-2020_12_03.json  success!
ScholarlyArticle_v0.3-DRAFT.json
ScholarlyArticle_v0.3-DRAFT.json  success!
SemanticTextAnnotation_v0.1-DRAFT-2019_02_08.json
SemanticTextAnnotation_v0.1-DRAFT-2019_02_08.json  success!
SemanticTextAnnotation_v0.1-DRAFT-2019_11_19.json
SemanticTextAnnotation_v0.1-DRAFT-2019_11_19.json  success!
SemanticTextAnnotation_v0.2-DRAFT-2020_12_03.json
SemanticTextAnnotation_v0.2-DRAFT-2020_12_03.json  success!
SemanticTextAnnotation_v0.3-DRAFT.json
SemanticTextAnnotation_v0.3-DRAFT.json  success!
SequenceAnnotation_v0.1-DRAFT-2019_06_21.json
SequenceAnnotation_v0.1-DRAFT-2019_06_21.json  success!
SequenceAnnotation_v0.7-DRAFT.json
SequenceAnnotation_v0.7-DRAFT.json  success!
SequenceMatchingModel_v0.1-DRAFT-2019_06_21.json
SequenceMatchingModel_v0.1-DRAFT-2019_06_21.json  success!
SequenceRange_v0.1-DRAFT-2019_06_21.json
SequenceRange_v0.1-DRAF



Study_v0.2-DRAFT.json  success!
Study_v0.3-DRAFT-type.json
Study_v0.3-DRAFT-type.json  success!
Study_v0.3-DRAFT.json
Study_v0.3-DRAFT.json  success!
Taxon_v0.1-DRAFT-2018_06_27.json




Taxon_v0.1-DRAFT-2018_06_27.json  success!
Taxon_v0.1-DRAFT-2018_09_25.json
Taxon_v0.1-DRAFT-2018_09_25.json  success!
Taxon_v0.1-DRAFT-2018_09_26.json
Taxon_v0.1-DRAFT-2018_09_26.json  success!
Taxon_v0.2-DRAFT-2018_09_26.json
Taxon_v0.2-DRAFT-2018_09_26.json  success!
Taxon_v0.3-DRAFT-2018_11_09.json
Taxon_v0.3-DRAFT-2018_11_09.json  success!
Taxon_v0.3-DRAFT-2018_11_10.json
Taxon_v0.3-DRAFT-2018_11_10.json  success!
Taxon_v0.3-RELEASE-2018_11_10.json
Taxon_v0.3-RELEASE-2018_11_10.json  success!
Taxon_v0.3-RELEASE-2019_11_18.json
Taxon_v0.3-RELEASE-2019_11_18.json  success!
Taxon_v0.4-DRAFT-2019_06_19.json
Taxon_v0.4-DRAFT-2019_06_19.json  success!
Taxon_v0.4-DRAFT-2019_06_24.json
Taxon_v0.4-DRAFT-2019_06_24.json  success!
Taxon_v0.4-DRAFT.json
Taxon_v0.4-DRAFT.json  success!
Taxon_v0.5-DRAFT-2020_04_06.json
Taxon_v0.5-DRAFT-2020_04_06.json  success!
Taxon_v0.6-RELEASE.json
Taxon_v0.6-RELEASE.json  success!
Taxon_v0.7-DRAFT.json




Taxon_v0.7-DRAFT.json  success!
TaxonName_v0.1-DRAFT-TYPE.json
TaxonName_v0.1-DRAFT-TYPE.json  success!
TaxonName_v0.1-DRAFT.json
TaxonName_v0.1-DRAFT.json  success!
TaxonName_v0.2-DRAFT.json
TaxonName_v0.2-DRAFT.json  success!
Tool  fail!
TrainingMaterial_v0.10-DRAFT.json
