In [1]:
import requests
from mira.dkg.client import Neo4jClient

## We define an instance of the Neo4jClient to query for the nodes and edges added to the DKG

In [2]:
client = Neo4jClient()

# We define sample nodes to be added to the DKG

In [3]:
node_list = []

node1 = {
    "id": "ido:0000511",
    "name": "infected population",
    "type": "class",
    "obsolete": False,
    "description": "An organism population whose members have an infection.",
    "synonyms": [],
    "alts": [],
    "xrefs": [],
    "labels": ["ido"],
    "properties": {},
    "link": "string",
    "physical_min": 0,
    "physical_max": 0,
    "suggested_data_type": "string",
    "suggested_unit": "string",
    "typical_min": 0,
    "typical_max": 0,
}

node2 = {
    "id": "ido:0000514",
    "name": "susceptible population",
    "type": "class",
    "obsolete": False,
    "description": "An organism population whose members are not infected with an infectious agent and who lack immunity to the infectious agent.",
    "synonyms": [],
    "alts": [],
    "xrefs": [],
    "labels": ["ido"],
    "properties": {},
    "link": "string",
}


# Define a fully instantiated node
node3 = {
    "id": "ido:0000511",
    "name": "infected population",
    "type": "class",
    "obsolete": False,
    "description": "An organism population whose members have an infection.",
    "synonyms": [{"value": "infected pop", "type": "skos:exactMatch"}],
    "alts": ["ido:0000511-alt1", "ido:0000511-alt2"],
    "xrefs": [
        {"id": "xref:0001", "type": "skos:exactMatch"},
        {"id": "xref:0002", "type": "skos:exactMatch"},
    ],
    "labels": ["ido", "population"],
    "properties": {"property1": ["value1"], "property2": ["value3"]},
}


node_list.append(node1)
node_list.append(node2)

## Test the add_nodes endpoint 

We first test the ```add_nodes``` endpoint that takes in a list of ```Entity``` and ```AskemEntity``` objects and adds them as nodes to the DKG. Only the ```id```, ```obsolete```, and ```type``` properties are mandatory.

In [4]:
response = requests.post("http://localhost:8771/api/add_nodes", json=node_list)
response.status_code

200

## We query for the added nodes

In [5]:
query = f"MATCH (N) WHERE N.id = 'ido:0000511' RETURN N"
print(client.query_tx(query)[0][0]._properties)

{'name': 'infected population', 'obsolete': False, 'description': 'An organism population whose members have an infection.', 'id': 'ido:0000511', 'type': 'class', 'labels': ['ido']}


In [6]:
query = f"MATCH (N) WHERE N.id = 'ido:0000514' RETURN N"
print(client.query_tx(query)[0][0]._properties)

{'name': 'susceptible population', 'obsolete': False, 'description': 'An organism population whose members are not infected with an infectious agent and who lack immunity to the infectious agent.', 'id': 'ido:0000514', 'type': 'class', 'labels': ['ido']}


## Add a fully instantiated node to the DKG
We then add a node with all of its properties supplied. Duplicate nodes (all properties must be matching for a node to be considered duplicate) are not added to the DKG.

In [7]:
node_list.append(node3)
node_list.append(node1)
response = requests.post("http://localhost:8771/api/add_nodes", json=node_list)
response.status_code

200

In [8]:
# We have two node objects returned from the query both with id ido:0000511 even though 
# we used the add_nodes endpoint to add a node with ```id=ido:0000511``` three times

query = f"MATCH (N) WHERE N.id = 'ido:0000511' RETURN N"
for n in client.query_tx(query):
    print(n[0]._properties)
    print()

{'name': 'infected population', 'obsolete': False, 'description': 'An organism population whose members have an infection.', 'id': 'ido:0000511', 'type': 'class', 'labels': ['ido']}

{'xref_types': ['skos:exactMatch', 'skos:exactMatch'], 'synonyms': ['infected pop'], 'alts': ['ido:0000511-alt1', 'ido:0000511-alt2'], 'xrefs': ['xref:0001', 'xref:0002'], 'obsolete': False, 'description': 'An organism population whose members have an infection.', 'type': 'class', 'labels': ['ido', 'population'], 'synonym_types': ['skos:exactMatch'], 'property_values': ['value1', 'value3'], 'property_predicates': ['property1', 'property2'], 'name': 'infected population', 'id': 'ido:0000511'}



# Test the add_relations  endpoint
The ```add_relations``` endpoint takes in a list of ```Relation``` objects and adds the relation to the DKG. All properties of the relation are required. Duplicate relations are not added.

In [9]:
relation_list = [
    {
        "source_curie": "probonto:k0000000",
        "target_curie": "probonto:k0000007",
        "type": "has_parameter",
        "pred": "probonto:c0000062",
        "source": "probonto",
        "graph": "https://raw.githubusercontent.com/probonto/ontology/master/probonto4ols.owl",
        "version": "2.5",
    },
    {
        "source_curie": "geonames:12042053",
        "target_curie": "geonames:292969",
        "type": "part_of",
        "pred": "bfo:0000050",
        "source": "geonames",
        "graph": "geonames",
        "version": "",
    },
]

In [10]:
response = requests.post(
    "http://localhost:8771/api/add_relations", json=relation_list
)
response.status_code

200

# We query for the added relations

In [11]:
source_curie = "probonto:k0000000"
target_curie = "probonto:k0000007"
rel_type = "has_parameter"

relation_query = f"MATCH (source_node {{id: '{source_curie}'}}), (target_node {{id: '{target_curie}'}}) MATCH (source_node)-[rel:{rel_type}]->(target_node) RETURN source_node, rel, target_node"

result = client.query_tx(relation_query)

print(f"Source Node : {result[0][0]._properties} \n")
print(f"Relation : {result[0][1]._properties} \n")
print(f"Target Node : {result[0][2]._properties} \n")

Source Node : {'property_values': ['probability'], 'xref_types': ['askemo:0000016'], 'property_predicates': ['has_parameter'], 'name': 'Bernoulli1', 'obsolete': False, 'xrefs': ['probonto:k0000028'], 'id': 'probonto:k0000000', 'source': 'probonto', 'type': 'class', 'version': '2.5'} 

Relation : {'pred': 'probonto:c0000062', 'source': 'probonto', 'version': '2.5', 'graph': 'https://raw.githubusercontent.com/probonto/ontology/master/probonto4ols.owl'} 

Target Node : {'synonym_types': ['referenced_by_latex', 'oboInOwl:hasExactSynonym'], 'synonyms': ['p', 'probability of success'], 'name': 'probability', 'obsolete': False, 'id': 'probonto:k0000007', 'type': 'class', 'version': '2.5'} 



In [12]:
source_curie = "geonames:12042053"
target_curie = "geonames:292969"
rel_type = "part_of"

relation_query = f"MATCH (source_node {{id: '{source_curie}'}}), (target_node {{id: '{target_curie}'}}) MATCH (source_node)-[rel:{rel_type}]->(target_node) RETURN source_node, rel, target_node"

result = client.query_tx(relation_query)

print(f"Source Node : {result[0][0]}._properties \n")
print(f"Relation : {result[0][1]._properties} \n")
print(f"Target Node : {result[0][2]._properties} \n")

Source Node : <Node element_id='5849' labels=frozenset() properties={'synonym_types': ['oboinowl:SynonymType', 'oboinowl:SynonymType', 'oboinowl:SynonymType', 'oboinowl:SynonymType', 'oboinowl:SynonymType'], 'synonyms': ['Musaffa', 'Musaffah City', 'msfh', 'Мусаффа', 'مصفح'], 'name': 'Musaffah', 'obsolete': False, 'id': 'geonames:12042053', 'type': 'individual'}>._properties 

Relation : {'pred': 'bfo:0000050', 'source': 'geonames', 'version': '', 'graph': 'geonames'} 

Target Node : {'property_values': ['AE.01'], 'property_predicates': ['code'], 'name': 'Abu Dhabi', 'obsolete': False, 'id': 'geonames:292969', 'type': 'individual'} 



# Test the add_resources endpoint
The ```add_resources``` endpoint accepts a list of strings that represent resource prefixes. Nodes and edges are extracted from each resource and then added to the DKG. The resources that can be added are ```eiffel, cso, wikidata, probonto, ncit, ncbitaxon, geonames```. The names are not case-sensitive and invalid resource prefixes are ignored. 

In [13]:
resource_list = [
    "probonto",
    "wikidata",
    "eiffel",
    "geonames",
    "ncit",
    "nbcbitaxon",
    "cso",
]

response = requests.post(
    "http://localhost:8771/api/add_resources", json=resource_list
)
response.status_code

200

# We then query for some of the added nodes from the resources processed

In [14]:
query = f"MATCH (N) WHERE N.id = 'wikidata:Q112300321' RETURN N"
client.query_tx(query)[0][0]._properties

{'synonym_types': ['debio:0000031'],
 'property_values': ['0.01438776877'],
 'xref_types': ['oboinowl:hasDbXref'],
 'synonyms': ['c_{2}'],
 'property_predicates': ['debio:0000042'],
 'xrefs': ['nist.codata:c22ndrc'],
 'name': 'second radiation constant',
 'obsolete': False,
 'description': "constant in Wien's radiation law",
 'id': 'wikidata:Q112300321',
 'type': 'class'}

In [15]:
query = f"MATCH (N) WHERE N.id = 'geonames:12042053' RETURN N"
client.query_tx(query)[0][0]._properties

{'synonym_types': ['oboinowl:SynonymType',
  'oboinowl:SynonymType',
  'oboinowl:SynonymType',
  'oboinowl:SynonymType',
  'oboinowl:SynonymType'],
 'synonyms': ['Musaffa', 'Musaffah City', 'msfh', 'Мусаффа', 'مصفح'],
 'name': 'Musaffah',
 'obsolete': False,
 'id': 'geonames:12042053',
 'type': 'individual'}

In [16]:
query = f"MATCH (N) WHERE N.id = 'ncit:C123547' RETURN N"
client.query_tx(query)[0][0]._properties

{'synonym_types': ['oboinowl:SynonymType', 'oboinowl:SynonymType'],
 'property_values': ['C123547',
  'Eukaryote',
  'Plasmodium falciparum',
  'C0032150',
  'CDISC',
  'Any unicellular, eukaryotic organism that can be assigned to the species Plasmodium falciparum.',
  '5833'],
 'synonyms': ['PLASMODIUM FALCIPARUM', 'Plasmodium falciparum'],
 'property_predicates': ['NCIT:NHC0',
  'NCIT:P106',
  'NCIT:P108',
  'NCIT:P207',
  'NCIT:P322',
  'NCIT:P325',
  'NCIT:P331'],
 'name': 'Plasmodium falciparum',
 'obsolete': False,
 'description': 'A protozoan parasite in the family Plasmodiidae. P. falciparum is transmitted by the female Anopheles mosquito and is a causative agent of malaria in humans. The malaria caused by this species is the most dangerous form of malaria.',
 'id': 'ncit:C123547',
 'type': 'class'}