In [None]:
!pip install rdflib-neo4j openai==0.28 retry

In [None]:
from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY
from rdflib import Namespace, Graph, URIRef, RDF, SKOS, Literal
from urllib.parse import quote_plus

*kursiver Text*## Configuring AURA DB Credentials

In [None]:
AURA_DB_URI=""
AURA_DB_USERNAME="neo4j"
AURA_DB_PWD=""

## Configuring Neo4j-Store Credentials

In [None]:
# Define your prefixes
prefixes = {
    'neo': Namespace('http://neo4j.org/vocab/sw#'),
    'oboInOwl': Namespace('http://www.geneontology.org/formats/oboInOwl#')
}

auth_data = {'uri': AURA_DB_URI,
             'database': "neo4j",
             'user': AURA_DB_USERNAME,
             'pwd': AURA_DB_PWD}

# Define your Neo4jStoreConfig
config = Neo4jStoreConfig(auth_data=auth_data,
                          custom_prefixes=prefixes,
                          handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.MAP,
                          batching=True,
                          batch_size=2000)

config.set_custom_mapping("rdfs","subClassOf","HAS_PARENT")
config.set_custom_mapping("owl","Class","DO_Disease")
config.set_custom_mapping("oboInOwl","hasDbXref","SAME_AS")

#Import by reference, passing a url

In [None]:
graph_store = Graph(store=Neo4jStore(config=config))

q = """PREFIX neo: <neo://voc#>
construct {
  ?dis a neo:WD_Disease ;
     neo:label ?disName ;
     neo:HAS_PARENT ?parentDisease ;
     neo:SAME_AS ?meshUri ;
     neo:SAME_AS ?diseaseOntoUri .
}
where {
  ?dis wdt:P31/wdt:P279* wd:Q18123741 ;
       rdfs:label ?disName . filter(lang(?disName) = "en")

  optional { ?dis wdt:P279 ?parentDisease .
             ?parentDisease wdt:P31/wdt:P279* wd:Q18123741 }
  optional { ?dis wdt:P486 ?meshCode . bind(URI(concat("http://id.nlm.nih.gov/mesh/",?meshCode))  as ?meshUri) }
  optional { ?dis wdt:P699 ?diseaseOntoId .  bind(URI(concat("http://purl.obolibrary.org/obo/",REPLACE(?diseaseOntoId, ":", "_")))  as ?diseaseOntoUri) }
}"""
file_path_wiki = f"https://query.wikidata.org/sparql?query={quote_plus(q)}"

graph_store.parse(file_path_wiki,format="ttl")

graph_store.close(True)

In [None]:
graph_store = Graph(store=Neo4jStore(config=config))

q2 = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
PREFIX neo: <neo://voc#>

CONSTRUCT {
?s a neo:Mesh_Disease;
     neo:label ?name ;
     neo:HAS_PARENT ?parentDescriptor .
}
FROM <http://id.nlm.nih.gov/mesh>
WHERE {
  {
    ?s meshv:broaderDescriptor* mesh:D007239
  }

  ?s rdfs:label ?name .

  optional {
    ?s meshv:broaderDescriptor ?parentDescriptor .
  }

}"""
file_path_nlm = f"https://id.nlm.nih.gov/mesh/sparql?format=TURTLE&query={quote_plus(q2)}"

graph_store.parse(file_path_nlm,format="ttl")

graph_store.close(True)

# Deleting shortcuts from the graph

In [None]:
graph_store = Graph(store=Neo4jStore(config=config))

graph_store.store.driver.execute_query("""MATCH (v:WD_Disease)<-[co:HAS_PARENT*2..]-(child)-[shortcut:HAS_PARENT]->(v) DELETE shortcut""")
graph_store.store.driver.execute_query("""MATCH (v:Mesh_Disease)<-[co:HAS_PARENT*2..]-(child)-[shortcut:HAS_PARENT]->(v) DELETE shortcut""")

In [None]:
graph_store.parse("http://purl.obolibrary.org/obo/doid.owl",format="xml")

In [None]:
g = Graph().parse("http://purl.obolibrary.org/obo/doid.owl",format="xml")
for (subject, predicate, obj) in g.triples((None, None, None)):
  if (predicate == URIRef("http://www.geneontology.org/formats/oboInOwl#hasDbXref") and str(obj).startswith("MESH:")):
    tmp_obj = "http://id.nlm.nih.gov/mesh/D" + str(obj)[-6:]
    graph_store.add((subject, predicate, URIRef(tmp_obj)))
graph_store.close(commit_pending_transaction=True)

#OPEN AI PART

In [None]:
import os
import openai

## Setting up Open Ai API KEY

In [None]:
os.environ["OPENAI_API_KEY"] = 'sk-...'
openai.api_key = os.getenv('OPENAI_API_KEY')

graph_store = Graph(store=Neo4jStore(config=config))

Uniqueness constraint on :Resource(uri) found. 
                
                


## Getting equality check from LLM

In [None]:
from retry import retry

@retry(tries=2, delay=5)
def process_gpt(system, prompt):

    completion = openai.ChatCompletion.create(
        model="gpt-4",
        max_tokens=2500,
        # Try to be as deterministic as possible
        temperature=0,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": prompt},
        ]
    )
    nlp_results = completion.choices[0].message.content
    return nlp_results

def get_incomplete_relationship(first_disease, second_disease):
  return process_gpt("""Given two different disease names, tell me if they are the same.""",
                     f"""The first name is {first_disease},  and the second name is: {second_disease}""")

In [None]:
incomplete = list(graph_store.store.driver.execute_query(
"""MATCH incomplete = (wdid:WD_Disease)-[:SAME_AS]-(do:DO_Disease)-[:SAME_AS]-(md:Mesh_Disease)
WHERE NOT EXISTS {WITH md, wdid MATCH (md)-[:SAME_AS]-(wdid)} AND
      count{WITH wdid match (wdid)-[r:SAME_AS]-() RETURN r} = count{WITH md match (md)-[r:SAME_AS]-() RETURN r} = 1 AND count{WITH do match (do)-[r:SAME_AS]-() RETURN r}  = 2
RETURN wdid.uri as wdid_uri, wdid.label as wdid_label, md.uri as md_uri,md.label as md_label"""))[0]

incomplete

In [None]:
chat_gpt_similarity = [get_incomplete_relationship(record["wdid_label"],record["md_label"]) for record in incomplete]

chat_gpt_similarity

In [None]:
for i,record in enumerate(incomplete):
  if "Yes," in chat_gpt_similarity[i] :
    print(f"""Creating the relationship between WD_Disease: {record["wdid_label"]} and MD_Disease: {record["md_label"]}""")
    graph_store.store.driver.execute_query(
      """
      MATCH (wdid:Resource{uri:$wdid_uri}), (md:Resource{uri:$md_uri})
      MERGE (wdid)-[r:SIMILAR_TO_GPT]->(md)
      SET r.text = $text
      """, wdid_uri=record["wdid_uri"], md_uri=record["md_uri"], text=chat_gpt_similarity[i])

graph_store.close()