In [1]:
import os
import openai
import networkx as nx
import xml.etree.ElementTree as ET
import requests
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
from neo4j import GraphDatabase

In [3]:
PARENT_PATH = Path.cwd().parent
if 'publishingchatgptpocweb' not in str(PARENT_PATH):
    PARENT_PATH = PARENT_PATH / 'publishingchatgptpocweb'

CORE_DIRECTORY = PARENT_PATH / 'core'
DATA_DIRECTORY = PARENT_PATH / 'data'

SPAQRQL_QUERY_FILE_PATH = CORE_DIRECTORY / 'sparql_query_template.sparql'
JATS_DATA_DIRECTORY_PATH = DATA_DIRECTORY / 'raw'
BASIC_GRAPH_SAVED_FILE_PATH = DATA_DIRECTORY / 'processed' / 'basic_graph.gml'
OPEN_AI_GRAPH_SAVED_FILE_PATH = DATA_DIRECTORY / 'processed' / 'open_ai_graph.gml'
USE_OPEN_AI=True

In [4]:
OPEN_AI_API_SECRET = os.getenv('Open__AI__API__Secret')
openai.api_key = OPEN_AI_API_SECRET

In [5]:
graph = Neo4jGraph(
    url="bolt://localhost:7687", username="neo4j", password="TGPPubCABIFirstAttempt"
)

In [None]:
# graph.query(
#     """
# MERGE (a:Article {pan: "20183000178", title: "The ebola epidemic in West Africa: proceedings of a workshop.", abstract: "This proceedings summarizes the presentations and discussions of the workshop convened by the Forum of Microbial Threats in March 2015, and consists of the information presented, questions raised, and improvements recommended by individual workshop participants. Chapter 2 outlines the trajectory and response to the most recent Ebola outbreak that occurred in West Africa. It includes lessons learned from previous outbreaks that could have been applied to this one, the distinction between this and previous outbreaks, and the challenges in Ebola treatment and control. Chapter 3 discusses case examples of emergency response and preparedness around the world. Finally, Chapter 4 examines current and future research opportunities to study the transmission, diagnosis, proper containment of the ill, and vaccine development. It also discusses the importance of developing a global health risk framework in case of another Ebola-like disease outbreak."})

# MERGE (auth1:Author {name: "A.Mack"})
# MERGE (auth2:Author {name: "M. R. Snair"})
# MERGE (auth3:Author {name: "C.Mundaca-Shah"})

# MERGE (a)-[:HAS_AUTHOR]->(auth1)
# MERGE (a)-[:HAS_AUTHOR]->(auth2)
# MERGE (a)-[:HAS_AUTHOR]->(auth3)

# MERGE (c1:Concept {name: "diagnosis", uri: "https://id.cabi.org/cabt/38337"})
# MERGE (c2:Concept {name: "Ebolavirus", uri: "https://id.cabi.org/cabt/41510"})
# MERGE (c3:Concept {name: "West Africa", uri: "https://id.cabi.org/cabt/124364"})
# MERGE (c4:Concept {name: "Filoviridae", uri: "https://id.cabi.org/cabt/48196"})
# MERGE (c5:Concept {name: "Bundibugyo ebolaviru", uri: "https://id.cabi.org/cabt/275494"})
# MERGE (c6:Concept {name: "Africa South of Sahara", uri: "https://id.cabi.org/cabt/6730"})
# MERGE (c7:Concept {name: "Sahel", uri: "https://id.cabi.org/cabt/18171"})
# MERGE (c8:Concept {name: "Benin", uri: "https://id.cabi.org/cabt/18171"})

# MERGE (a)-[:HAS_PREFERRED_TERM]->(c1)
# MERGE (a)-[:HAS_ORGANISM_TERM]->(c2)
# MERGE (a)-[:HAS_GEOGRAPHIC_TERM]->(c3)

# MERGE (c2)-[:BROADER_TERM]->(c4)
# MERGE (c2)-[:NARROWER_TERM]->(c5)

# MERGE (c3)-[:BROADER_TERM]->(c6)
# MERGE (c3)-[:NARROWER_TERM]->(c8)
# MERGE (c3)-[:RELATED_TERM]->(c7)

# """
# )

graph.refresh_schema()

In [6]:
print(graph.get_schema)


        Node properties are the following:
        [{'properties': [{'property': 'abstract', 'type': 'STRING'}, {'property': 'pan', 'type': 'STRING'}, {'property': 'title', 'type': 'STRING'}], 'labels': 'Article'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Author'}, {'properties': [{'property': 'uri', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}], 'labels': 'Concept'}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Article)-[:HAS_AUTHOR]->(:Author)', '(:Article)-[:HAS_PREFERRED_TERM]->(:Concept)', '(:Article)-[:HAS_ORGANISM_TERM]->(:Concept)', '(:Article)-[:HAS_GEOGRAPHIC_TERM]->(:Concept)', '(:Author)-[:HAS_WRITTEN]->(:Article)', '(:Concept)-[:NARROWER_TERM]->(:Concept)', '(:Concept)-[:BROADER_TERM]->(:Concept)', '(:Concept)-[:RELATED_TERM]->(:Concept)']
        


In [7]:
from langchain.prompts.prompt import PromptTemplate


CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Ensure to handle the results of the Cypher query correctly in the subsequent stages of your application to convey the correct results to the user.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:
# What organisms are mentioned in article about ebola epidemic in West Africa?
MATCH (a:Article)
WHERE a.title =~ '.*ebola epidemic in West Africa.*'
MATCH (a)-[:HAS_ORGANISM_TERM]->(concept:Concept)
RETURN concept.name

# Which articles are written by M. R. Snair?
MATCH (a:Author)
WHERE a.name =~ '.*M\\. R\\. Snair.*'
MATCH (a)-[:HAS_AUTHOR]-(article:Article)
RETURN article.title

# Who are the authors for article ebola epidemic in West Africa?
MATCH (a:Article)
WHERE a.title =~ '.*ebola epidemic in West Africa.*'
MATCH (a)-[:HAS_AUTHOR]->(author:Author)
RETURN author.name

The question is:
{question}"""


CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPEN_AI_API_SECRET, temperature=0), graph=graph, verbose=True, cypher_prompt=CYPHER_GENERATION_PROMPT
)

# chain = GraphCypherQAChain.from_llm(
#      graph=graph,
#      cypher_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=OPEN_AI_API_SECRET),
#      qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=OPEN_AI_API_SECRET),
#      cypher_prompt=CYPHER_GENERATION_PROMPT,
#      verbose=True,
# )

In [8]:
chain.run("What organisms are mentioned in article about ebola epidemic in West Africa?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Article)
WHERE a.title =~ '.*ebola epidemic in West Africa.*'
MATCH (a)-[:HAS_ORGANISM_TERM]->(concept:Concept)
RETURN concept.name[0m
Full Context:
[32;1m[1;3m[{'concept.name': 'Ebolavirus'}][0m

[1m> Finished chain.[0m


'The article about the Ebola epidemic in West Africa mentions the Ebolavirus.'

In [9]:
chain.run("Can you tell me a more about the ebola epidemic in West Africa?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Article)
WHERE a.title =~ '.*ebola epidemic in West Africa.*'
RETURN a.abstract[0m
Full Context:
[32;1m[1;3m[{'a.abstract': 'This proceedings summarizes the presentations and discussions of the workshop convened by the Forum of Microbial Threats in March 2015, and consists of the information presented, questions raised, and improvements recommended by individual workshop participants. Chapter 2 outlines the trajectory and response to the most recent Ebola outbreak that occurred in West Africa. It includes lessons learned from previous outbreaks that could have been applied to this one, the distinction between this and previous outbreaks, and the challenges in Ebola treatment and control. Chapter 3 discusses case examples of emergency response and preparedness around the world. Finally, Chapter 4 examines current and future research opportunities to study the transmission, diagnosis, proper c

'The workshop proceedings provide a summary of the presentations and discussions on the Ebola epidemic in West Africa. Chapter 2 specifically outlines the trajectory and response to the most recent outbreak, highlighting lessons learned from previous outbreaks that could have been applied. It also discusses the challenges in Ebola treatment and control. If you would like more detailed information, I can provide you with the specific chapters from the workshop proceedings.'

In [10]:
chain.run("Who are the authors for article ebola epidemic in West Africa?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Article)
WHERE a.title =~ '.*ebola epidemic in West Africa.*'
MATCH (a)-[:HAS_AUTHOR]->(author:Author)
RETURN author.name[0m
Full Context:
[32;1m[1;3m[{'author.name': 'C.Mundaca-Shah'}, {'author.name': 'M. R. Snair'}, {'author.name': 'A.Mack'}][0m

[1m> Finished chain.[0m


'The authors for the article on the Ebola epidemic in West Africa are C. Mundaca-Shah, M. R. Snair, and A. Mack.'

In [11]:
chain.run("Which article is written by M. R. Snair?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Author)
WHERE a.name = 'M. R. Snair'
MATCH (a)-[:HAS_WRITTEN]->(article:Article)
RETURN article.title[0m
Full Context:
[32;1m[1;3m[{'article.title': 'The ebola epidemic in West Africa: proceedings of a workshop.'}][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have the information about which article is written by M. R. Snair."

In [12]:
chain.run("What is importance of developing a global health risk framework?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Article)
WHERE a.title =~ '.*developing a global health risk framework.*'
RETURN a.abstract[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


'Developing a global health risk framework is of utmost importance as it provides a structured approach to understanding and addressing health risks on a global scale. This framework allows for a comprehensive assessment of potential health threats, enabling countries and organizations to better prepare and respond to emerging diseases and pandemics. By establishing a standardized framework, it promotes collaboration and information sharing among nations, facilitating a coordinated global response to health crises. Additionally, a global health risk framework helps to identify and prioritize areas for intervention, ensuring that resources are allocated efficiently and effectively to protect public health worldwide.'