# Install libraries

In [35]:
%%capture
!pip install wikidataintegrator
!pip install graphviz

# Import libraries and source data

In [37]:
from wikidataintegrator import wdi_core, wdi_login
import json
import copy
from datetime import datetime
import requests
import graphviz
ontologies = json.loads(requests.get("https://obofoundry.org/registry/ontologies.jsonld").text)

# Set QID and Graphviz constants

In [48]:
timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z")
refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
obo_reference = wdi_core.WDUrl(value="https://obofoundry.org/", prop_nr="P854", is_reference=True)
obo_data_link = wdi_core.WDUrl(value="https://obofoundry.org/registry/ontologies.jsonld", prop_nr="P854", is_reference=True)
obo_reference = [obo_reference, obo_data_link, refRetrieved]

# Graphviz
symbol = dict()
symbol["wditem"] = "oval"
symbol["datatype"] = "octagon"
symbol["string"] = "rectangle"
symbol["entityschema"]="diamond"
symbol["oneof"]='record'
symbol["bnode"]='point'

# Get licenses and their QIDS

In [28]:
licensesQids = dict()
for ontology in ontologies["ontologies"]:
  if "license" in ontology.keys():
   if ontology["license"]["label"] not in licensesQids.keys():
        try:
            licensesQids[ontology["license"]["label"]] = dict()
            licensesQids[ontology["license"]["label"]]["url"] = ontology["license"]["url"]
            query = f"""
                        SELECT * WHERE {{
                            ?item wdt:P2888 <{ontology["license"]["url"]}>  ;
                        }}"""
            results = wdi_core.WDFunctionsEngine.execute_sparql_query(query)
            print(query)
            licensesQids[ontology["license"]["label"]]["qid"] = results["results"]["bindings"][0]["item"]["value"]
        except:
            print("mismatch")
licensesQids


                        SELECT * WHERE {
                            ?item wdt:P2888 <https://creativecommons.org/licenses/by/4.0/>  ;
                        }

                        SELECT * WHERE {
                            ?item wdt:P2888 <http://creativecommons.org/licenses/by/3.0/>  ;
                        }

                        SELECT * WHERE {
                            ?item wdt:P2888 <https://creativecommons.org/publicdomain/zero/1.0/>  ;
                        }

                        SELECT * WHERE {
                            ?item wdt:P2888 <https://creativecommons.org/licenses/by/2.0/>  ;
                        }

                        SELECT * WHERE {
                            ?item wdt:P2888 <http://www.apache.org/licenses/LICENSE-2.0>  ;
                        }

                        SELECT * WHERE {
                            ?item wdt:P2888 <https://hpo.jax.org/app/license>  ;
                        }

                        SELECT * WHER

{'CC BY 4.0': {'url': 'https://creativecommons.org/licenses/by/4.0/',
  'qid': 'http://www.wikidata.org/entity/Q20007257'},
 'CC BY 3.0': {'url': 'http://creativecommons.org/licenses/by/3.0/',
  'qid': 'http://www.wikidata.org/entity/Q14947546'},
 'CC0 1.0': {'url': 'https://creativecommons.org/publicdomain/zero/1.0/',
  'qid': 'http://www.wikidata.org/entity/Q6938433'},
 'CC BY 2.0': {'url': 'https://creativecommons.org/licenses/by/2.0/',
  'qid': 'http://www.wikidata.org/entity/Q19125117'},
 'Apache 2.0 License': {'url': 'http://www.apache.org/licenses/LICENSE-2.0',
  'qid': 'http://www.wikidata.org/entity/Q13785927'},
 'hpo': {'url': 'https://hpo.jax.org/app/license',
  'qid': 'http://www.wikidata.org/entity/Q112817130'},
 'Artistic License 2.0': {'url': 'http://opensource.org/licenses/Artistic-2.0',
  'qid': 'http://www.wikidata.org/entity/Q14624826'},
 'GPL-3.0': {'url': 'https://www.gnu.org/licenses/gpl-3.0.en.html',
  'qid': 'http://www.wikidata.org/entity/Q10513445'}}

# Build Wikidata statements for a single OBO ontology

In [54]:
#    if command == "attr":
##        localviz.attr(arg1, shape=shape)
#        schema.attr(arg1, shape=shape)
#    if command == "node":
#        localviz.node(arg1,label=label)
#        schema.node(arg1,label=label)
#    if command == "edge":
#        localviz.edge(arg1, arg2, label=label)
#        schema.edge(arg1, arg2, label=label)

schema = graphviz.Digraph()
schema.attr('node', shape=symbol['entityschema'])
schema.node('oboontology',label="Ontology in OBO foundry")
schema.node('oboreference', label="reference")
schema.node('oborank', label="wikibase rank")
schema.attr('node', shape=symbol['wditem'])
schema.node('Q324254', label="wd:Q324254 (ontology)")
schema.node('Q4117183', label="wd:Q4117183 (Open Biomedical Ontologies)")
schema.attr('node', shape=symbol['bnode'])
schema.node('P31Q324254')
schema.node('P361Q4117183')
schema.node('P2888URI')
schema.node('P1813String')
schema.node('P275licenses')
schema.attr('node', shape=symbol['datatype'])
schema.node('P856URIvalue', label="URL")
schema.node('P2888URIvalue', label="URI")
schema.node('P1813Stringvalue', label="string")
schema.node('P5933twittervalue', label="string")
schema.attr('node', shape=symbol['oneof'])
schema.node('P275licensesList', label='{ wd:Q20007257 (CC BY 4.0) | wd:Q14947546 (CC BY 3.0) | wd:Q6938433 (CC0 1.0) |wd:Q19125117 (CC BY 2.0) |wd:Q13785927 (Apache 2.0 License) |wd:Q112817130 (hpo) |wd:Q14624826 (Artistic License 2.0) |wd:Q10513445 (GPL-3.0) }')

# P31
schema.edge('oboontology', 'P31Q324254', label="p:P31 (instance of)")
schema.edge('P31Q324254', 'Q324254', label="ps:P31 (instance of (value)")
schema.edge('P31Q324254', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P361Q4117183', 'oborank', label="wikibase:rank")

# P361
schema.edge('oboontology', 'P361Q4117183', label="p:P361 (part of)")
schema.edge('P361Q4117183', 'Q4117183', label="ps:P361 (part of value)")
schema.edge('P361Q4117183', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P361Q4117183', 'oborank', label="wikibase:rank")

# P856
schema.edge('oboontology', 'P856URI', label="p:P856 (homepage)")
schema.edge('P856URI', 'P856URIvalue', label="ps:P856 (homepage value)")
schema.edge('P856URI', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P856URI', 'oborank', label="wikibase:rank")

# P2888
schema.edge('oboontology', 'P2888URI', label="p:P2888 (exact match)")
schema.edge('P2888URI', 'P2888URIvalue', label="ps:P2888 (exact match)")
schema.edge('P2888URI', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P2888URI', 'oborank', label="wikibase:rank")

# P1813 (short name)
schema.edge('oboontology', 'P1813String', label="p:P1813 (short name)")
schema.edge('P1813String', 'P1813Stringvalue', label="ps:P1813 (short name value)")
schema.edge('P1813String', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P1813String', 'oborank', label="wikibase:rank")

# P275 (license)
schema.edge('oboontology', 'P275licenses', label="p:P275 (license)")
schema.edge('P275licenses', 'P275licensesList', label="ps:P1813 (short name value)")
schema.edge('P275licenses', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P275licenses', 'oborank', label="wikibase:rank")

# P5933 (twitter handle)
schema.edge('oboontology', 'P5933twitter', label="p:P5933 (twitter)")
schema.edge('P5933twitter', 'P5933twittervalue', label="p:P5933 (twitter value)")
schema.edge('P5933twitter', 'oboreference', label="prov:wasDerivedFrom")
schema.edge('P5933twitter', 'oborank', label="wikibase:rank")

for ontology in ontologies["ontologies"]:
    schema.node('oboontology',label="Ontology in OBO foundry")
    statements = []
    statements.append(wdi_core.WDItemID(value="Q324254", prop_nr="P31", references=[copy.deepcopy(obo_reference)]))
    statements.append(wdi_core.WDItemID(value="Q4117183", prop_nr="P361", references=[copy.deepcopy(obo_reference)]))

    if ontology["activity_status"] == "Active":
      # exact match
      if "homepage" in ontology.keys():
        if ontology["homepage"]:
          try:
            statements.append(wdi_core.WDUrl(value=ontology["homepage"], prop_nr="P856", references=[copy.deepcopy(obo_reference)]))
          except: 
            statements.append(wdi_core.WDUrl(value="https://"+ontology["homepage"], prop_nr="P856", references=[copy.deepcopy(obo_reference)]))
      if "ontology_purl" in ontology.keys():
        statements.append(wdi_core.WDUrl(value=ontology["ontology_purl"], prop_nr="P2888", references=[copy.deepcopy(obo_reference)]))
        #print(ontology["ontology_purl"])
      # short name
      statements.append(wdi_core.WDMonolingualText(value=ontology["id"], prop_nr="P1813", language="mul", references=[copy.deepcopy(obo_reference)]))
      # license 
      if "license" in ontology.keys():
        statements.append(wdi_core.WDItemID(value=licensesQids[ontology["license"]["label"]]["qid"].replace("http://www.wikidata.org/entity/", ""), prop_nr="P275", references=[copy.deepcopy(obo_reference)]))
      if "twitter" in ontology.keys():
        statements.append(wdi_core.WDString(value=ontology["twitter"], prop_nr="P5933", references=[copy.deepcopy(obo_reference)]))
schema.view()

'Digraph.gv.pdf'

In [57]:
for ontology in ontologies["ontologies"]:
  if "homepage" in ontology.keys():
    query = f" SELECT * WHERE {{?s ?p [?ps <{ontology['homepage']}> ;] . ?prop wikibase:claim ?p . }}"
    result = wdi_core.WDFunctionsEngine.execute_sparql_query(query)
    if len(result["results"]["bindings"]) ==0:
      print(ontology["id"], ontology["title"], len(result["results"]["bindings"]), ontology["activity_status"])
      #print(query)
      for resultje in result["results"]["bindings"]:
        print(ontology["id"], resultje["s"]["value"])


ado Alzheimer's Disease Ontology 0 active
apo Ascomycete phenotype ontology 0 active
bco Biological Collections Ontology 0 active
cdno Compositional Dietary Nutrition Ontology 0 active
clo Cell Line Ontology 0 active
cob Core Ontology for Biology and Biomedicine 0 active
dron The Drug Ontology 0 active
eupath VEuPathDB ontology 0 active
foodon Food Ontology 0 active
ontoneo Obstetric and Neonatal Ontology 0 active
ro Relation Ontology 0 active
so Sequence types and features ontology 0 active
vbo Vertebrate Breed Ontology 0 active
wbbt C. elegans Gross Anatomy Ontology 0 active
wbls C. elegans development ontology 0 active
eco Evidence ontology 0 active
hp Human Phenotype Ontology 0 active
kisao Kinetic Simulation Algorithm Ontology 0 active
sbo Systems Biology Ontology 0 active
scdo Sickle Cell Disease Ontology 0 active
sibo Social Insect Behavior Ontology 0 orphaned
vario Variation Ontology 0 orphaned
ceph Cephalopod Ontology 0 inactive
ehdaa2 Human developmental anatomy, abstract 0 i

In [None]:
for ontology in ontologies["ontologies"]:
  if "homepage" in ontology.keys():
    escape_quote_ontology = ontology['title'].replace("'", "\\'")
    
    query = f"""
             SELECT * WHERE {{
                   {{?s rdfs:label '{escape_quote_ontology}'@en  . }}
                   UNION 
                   {{?s skos:altLabel '{escape_quote_ontology}'@en .}}
                   }}"""
    result = wdi_core.WDFunctionsEngine.execute_sparql_query(query)

    if (len(result["results"]["bindings"])) == 0:
      print(escape_quote_ontology, ontology['activity_status'])

In [58]:
import sys
for ontology in ontologies["ontologies"]:
    if "homepage" in ontology.keys():
        escape_quote_ontology = ontology['title'].replace("'", "\\'")
    query = f"""
             SELECT * WHERE {{
                   {{?s rdfs:label '{escape_quote_ontology}'@en  . }}
                   UNION 
                   {{?s skos:altLabel '{escape_quote_ontology}'@en .}}
                   }}"""
    result = wdi_core.WDFunctionsEngine.execute_sparql_query(query)
    if (len(result["results"]["bindings"])) == 0:
        print(escape_quote_ontology, ontology['activity_status'])
        if ontology["activity_status"] == "active":
            print("ik kom hier")
            statements = []
            statements.append(wdi_core.WDItemID(value="Q324254", prop_nr="P31", references=[copy.deepcopy(obo_reference)]))
            statements.append(wdi_core.WDItemID(value="Q4117183", prop_nr="P361", references=[copy.deepcopy(obo_reference)]))
        
            # exact match
            if "homepage" in ontology.keys():
                if ontology["homepage"]:
                    try:
                        statements.append(wdi_core.WDUrl(value=ontology["homepage"], prop_nr="P856", references=[copy.deepcopy(obo_reference)]))
                    except: 
                        statements.append(wdi_core.WDUrl(value="https://"+ontology["homepage"], prop_nr="P856", references=[copy.deepcopy(obo_reference)]))
      
            if "ontology_purl" in ontology.keys():
                statements.append(wdi_core.WDUrl(value=ontology["ontology_purl"], prop_nr="P2888", references=[copy.deepcopy(obo_reference)]))
      
            # short name
            statements.append(wdi_core.WDMonolingualText(value=ontology["id"], prop_nr="P1813", language="mul", references=[copy.deepcopy(obo_reference)]))
            # license 
            if "license" in ontology.keys():
                statements.append(wdi_core.WDItemID(value=licensesQids[ontology["license"]["label"]]["qid"].replace("http://www.wikidata.org/entity/", ""), prop_nr="P275", references=[copy.deepcopy(obo_reference)]))
            if "twitter" in ontology.keys():
                statements.append(wdi_core.WDString(value=ontology["twitter"], prop_nr="P5933", references=[copy.deepcopy(obo_reference)]))
            
            # publications
            if "publication" in ontology.keys():
                pmidquery = f"""SELECT * WHERE {{
                               ?item wdt:P698 "{ontology["publication"]["id"].replace("https://www.ncbi.nlm.nih.gov/pubmed/", "")}" .
                            }}"""
                pmidresult = wdi_core.WDFunctionsEngine.execute_sparql_query(pmidquery)
                for pmidqid in pmidresult["results"]["bindings"]:
                    statements.append(wdi_core.WDItemId(value=pmidqid["item"]["value"].replace("http://www.wikidata.org/entity", ""), prop_nr="P1343", references=[copy.deepcopy(obo_reference)]))

            item = wdi_core.WDItemEngine(new_item=True, data=statements)
            escape_quote_ontology = ontology['title'].replace("'", "\\'")
            item.set_label(escape_quote_ontology, lang="en")
            item.set_description("ontology part of OBOFoundry", lang="en")
            item.set_aliases(aliases=[ontology["id"]], lang="en")
            print(item.write(login))
            sys.exit("first write")

Mouse pathology ontology active
ik kom hier
Q114295705


SystemExit: first write

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [59]:
WDUSER = ""
WDPASS = ""
login = wdi_login.WDLogin(WDUSER, WDPASS)

login failed: The supplied credentials could not be authenticated.


ValueError: login FAILED!!

In [None]:
for ontology in ontologies["ontologies"]:
  if "dependencies" in ontology.keys():
    for dependency in ontology["dependencies"]:
      print(ontology["id"], dependency)

In [39]:
licensesQids

{'CC BY 4.0': {'url': 'https://creativecommons.org/licenses/by/4.0/',
  'qid': 'http://www.wikidata.org/entity/Q20007257'},
 'CC BY 3.0': {'url': 'http://creativecommons.org/licenses/by/3.0/',
  'qid': 'http://www.wikidata.org/entity/Q14947546'},
 'CC0 1.0': {'url': 'https://creativecommons.org/publicdomain/zero/1.0/',
  'qid': 'http://www.wikidata.org/entity/Q6938433'},
 'CC BY 2.0': {'url': 'https://creativecommons.org/licenses/by/2.0/',
  'qid': 'http://www.wikidata.org/entity/Q19125117'},
 'Apache 2.0 License': {'url': 'http://www.apache.org/licenses/LICENSE-2.0',
  'qid': 'http://www.wikidata.org/entity/Q13785927'},
 'hpo': {'url': 'https://hpo.jax.org/app/license',
  'qid': 'http://www.wikidata.org/entity/Q112817130'},
 'Artistic License 2.0': {'url': 'http://opensource.org/licenses/Artistic-2.0',
  'qid': 'http://www.wikidata.org/entity/Q14624826'},
 'GPL-3.0': {'url': 'https://www.gnu.org/licenses/gpl-3.0.en.html',
  'qid': 'http://www.wikidata.org/entity/Q10513445'}}

In [52]:
print("{ ", end="")
for license in licensesQids.keys():
    print(licensesQids[license]['qid'].replace("http://www.wikidata.org/entity/", "wd:")+" ("+license+") | ", end="")
print("}", end="")

{ wd:Q20007257 (CC BY 4.0) | wd:Q14947546 (CC BY 3.0) | wd:Q6938433 (CC0 1.0) | wd:Q19125117 (CC BY 2.0) | wd:Q13785927 (Apache 2.0 License) | wd:Q112817130 (hpo) | wd:Q14624826 (Artistic License 2.0) | wd:Q10513445 (GPL-3.0) | }

In [56]:
for license in licensesQids.keys():
    print(licensesQids[license]['qid'].replace("http://www.wikidata.org/entity/", "wd:")+" # "+license)

wd:Q20007257 # CC BY 4.0
wd:Q14947546 # CC BY 3.0
wd:Q6938433 # CC0 1.0
wd:Q19125117 # CC BY 2.0
wd:Q13785927 # Apache 2.0 License
wd:Q112817130 # hpo
wd:Q14624826 # Artistic License 2.0
wd:Q10513445 # GPL-3.0
