In [None]:
! pip install --upgrade --quiet langchain langchain-community langchain-openai langchain-experimental wikipedia neo4j tiktoken yfiles_jupyter

In [None]:
from google.colan import userdata
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
import os
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
try:
    import google.colab
    from google.colab import output
    output.enable_custom_widget_manager()

except:
    pass

from yfiles_jupyter_graphs import GraphWidget
from neo4j import GraphDatabase
from langchain_openai import OpenAIEmbeddings
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from typing import Tuple, List, Optional
from langchain_community.vectorstores import Neo4jVector
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain.schema.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch, RunnableParallel


In [None]:
NEO4J_URL = "neo4j+s://0ee1bc88.database.neo4j.io"
NEO4J_USERNAME = "NEO4J_USERNAME"
NEO4J_PASSWORD = "NEO4J_PASSWORD"

In [None]:
OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")

In [None]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["NEO4J_URL"] = NEO4J_URL
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD

In [None]:
graph = Neo4jGraph()

In [None]:
raw_documents = WikipediaLoader(query="Elizabeth I").load()

In [None]:
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])

In [None]:
model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")

In [None]:
llm_transformer = LLMGraphTransformer(llm=model)

In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [None]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [None]:
# Directly show the graph resulting from the given Cypher query
default_cypher = "MATCH (s)-[r:|MENTIONS]->(t) RETURN s,r,t LIMIT 50"

In [None]:
def showgraph(cypher: str=default_cypher):
    # Create a neo4j session to run queries
    driver = GraphDatabase.driver(
        uri = NEO4J_URL,
        auth = (NEO4J_USERNAME, NEO4J_PASSWORD)
    )

    session = driver.session()
    widget = GraphWidget(graph=session.run(cypher).graph())
    widget.node_label_mapping = "id"
    display(widget)

    return widget

In [None]:
showgraph()

In [None]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label = "Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [None]:
# Extract entities from text
class Entities(BaseModel):
    """ Identifying Information about entities """

    name: List[str] = Field(
        ...,
        description="All people, organization or business entities that appear in the text",
    )

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and people entities from the text"
        ),

        (
            "human",
            "Use the given format to extract information from the following"
            "input: {question}",
        ),
    ]
)

In [None]:
entity_chain = prompt | model.with_structured_output(Entities)

In [None]:
entity_chain.invoke({"question": "Where was Amelia Earhart born?"}).names

In [None]:
def generate_full_text_query(input:str) -> str:
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f"{word}~2 AND"

    full_text_query += f"{word[-1]}~2"
    return full_text_query.strip()

In [3]:
# Fulltext index query
def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes("'entity', $query, (1imit:2})
            YIELD node, score
            CALL {
            WITH node
            MATCH (node)-[r: !MENTIONS]->(neighbor)
            RETURN node.id + ' - '+ type(r) + " -> ' + neighbor.id AS output
            UNION ALL
            WITH node
            MATCH (node) <- [r: IMENTIONS]-(neighbor)
            RETURN neighbor.id + ' - '+ type(r) + ' -> ' + node.id AS output

            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el["output"] for el in response])

    return result

In [None]:
print(structured_retriever("Who is Elizabeth I"))

In [None]:
def retriever(question: str):
    print(f"search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""

            Structured data:
            {structured_data}
            Unstructured_data:
            {"#Document".join(unstructured_data)}
        """
    
    return final_data

In [None]:
template = """ 

        Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
        in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:
"""

In [None]:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(template)

In [None]:
def fromat_chat_history(chat_history: List[tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))

    return buffer

In [None]:
_search_query = RunnableBranch(
# If input includes chat_history, we condense it with the follow-up question
(
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
run_name="HasChatHistoryCheck"
), # Condense follow-up question and chat into a standalone_question
RunnablePassthrough.assign(
chat_history=lambda x: fromat_chat_history(x["chat_history"])
),

CONDENSE_QUESTION_PROMPT
| ChatOpenAI(temperature=0)
| StrOutputParser(),
),
# Else, we have no chat history, so just pass through the question
RunnableLambda(lambda x : x["question"]),

)


In [None]:
template_1 = """

        Answer the question based only on the following context:
        {context}

        Question: {question}
        Use natural language and be concise.
        Answer:
"""

In [None]:
prompt_1 = ChatPromptTemplate.from_template(template_1)

In [None]:
chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )

    | prompt_1
    | model
    | StrOutputParser()
)

In [None]:
chain.invoke({"question": "which house did Elizabeth I belong to?"})