In [None]:
!pip install --upgrade --quiet  langchain langchain-community langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv json-repair langchain-openai langchain_core


In [20]:
import streamlit as st
import os
from langchain_groq import ChatGroq
# from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv

load_dotenv()


# Graph related imports:

from langchain_experimental.graph_transformers import LLMGraphTransformer
from yfiles_jupyter_graphs import GraphWidget
from langchain.vectorstores import Neo4jVector
from langchain_community.graphs import Neo4jGraph
from neo4j import GraphDatabase
from pydantic import BaseModel, Field
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import  RunnablePassthrough

In [3]:
graph = Neo4jGraph()
loader = TextLoader(file_path="Elon.txt", encoding="utf8")
docs = loader.load()
docs

[Document(metadata={'source': 'Elon.txt'}, page_content="Elon Reeve Musk FRS (/ˈiːlɒn/; born June 28, 1971) is a businessman and investor known for his key roles in the space company SpaceX and the automotive company Tesla, Inc. Other involvements include ownership of X Corp., the company that operates the social media platform X (formerly Twitter), and his role in the founding of the Boring Company, xAI, Neuralink, and OpenAI. He is the wealthiest individual in the world; as of November 2024 Forbes estimates his net worth to be US$304 billion.[3]\n\nMusk was born in Pretoria, South Africa, to Maye (née Haldeman), a model, and Errol Musk, a businessman and engineer. Musk briefly attended the University of Pretoria before immigrating to Canada at the age of 18, acquiring citizenship through his Canadian-born mother. Two years later he matriculated at Queen's University at Kingston in Canada. Musk later transferred to the University of Pennsylvania and received bachelor's degrees in econ

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap = 20)
documents = text_splitter.split_documents(documents=docs)
documents 

[Document(metadata={'source': 'Elon.txt'}, page_content='Elon Reeve Musk FRS (/ˈiːlɒn/; born June 28, 1971) is a businessman and investor known for his key roles in the space company SpaceX and the automotive company Tesla, Inc. Other involvements include'),
 Document(metadata={'source': 'Elon.txt'}, page_content='include ownership of X Corp., the company that operates the social media platform X (formerly Twitter), and his role in the founding of the Boring Company, xAI, Neuralink, and OpenAI. He is the'),
 Document(metadata={'source': 'Elon.txt'}, page_content='OpenAI. He is the wealthiest individual in the world; as of November 2024 Forbes estimates his net worth to be US$304 billion.[3]'),
 Document(metadata={'source': 'Elon.txt'}, page_content='Musk was born in Pretoria, South Africa, to Maye (née Haldeman), a model, and Errol Musk, a businessman and engineer. Musk briefly attended the University of Pretoria before immigrating to Canada at'),
 Document(metadata={'source': 'Elon.tx

In [5]:
# Configuring the LLM (Llama3) from GROQ:
groq_api_key = os.getenv("GROQ_API_KEY")


llm = ChatGroq(groq_api_key=groq_api_key,
               model="Llama3-8b-8192")

In [6]:
llm_transformer = LLMGraphTransformer(llm=llm)
graph_documents = llm_transformer.convert_to_graph_documents(documents)


In [7]:
graph_documents

[GraphDocument(nodes=[Node(id='Elon Reeve Musk', type='Person')], relationships=[Relationship(source=Node(id='Elon Reeve Musk', type='Person'), target=Node(id='Spacex', type='Company'), type='WORKS_FOR'), Relationship(source=Node(id='Elon Reeve Musk', type='Person'), target=Node(id='Tesla, Inc.', type='Company'), type='WORKS_FOR')], source=Document(metadata={'source': 'Elon.txt'}, page_content='Elon Reeve Musk FRS (/ˈiːlɒn/; born June 28, 1971) is a businessman and investor known for his key roles in the space company SpaceX and the automotive company Tesla, Inc. Other involvements include')),
 GraphDocument(nodes=[Node(id='Elon_Musk', type='Person')], relationships=[Relationship(source=Node(id='Elon_Musk', type='Person'), target=Node(id='X_Corp', type='Organization'), type='OWNS')], source=Document(metadata={'source': 'Elon.txt'}, page_content='include ownership of X Corp., the company that operates the social media platform X (formerly Twitter), and his role in the founding of the Bo

In [8]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel = True,
    include_source = True
)

In [9]:
def show_graph():
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run("Match (s)-[r:!MENTIONS]->(t) Return s,r,t").graph())
    widget.node_label_mapping = "id"
    return widget

show_graph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [10]:
# Embedding the graph DB into a vector form
vectorized_graph_db = Neo4jVector.from_existing_graph(
    OllamaEmbeddings(),
    search_type = "hybrid",
    node_label = "document",
    text_node_properties = ["text"],
    embedding_node_property = "embedding"
)


In [11]:
vector_retirever = vectorized_graph_db.as_retriever()

In [12]:
driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))


def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

# Function to execute the query
def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext index created successfully.")

# Call the function to create the index
try:
    create_index()
except:
    pass

# Close the driver connection
driver.close()

In [13]:
from typing import List

class Entities(BaseModel):
    "Identifying information about entities."

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that" 
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)


entity_chain = llm.with_structured_output(Entities)

In [14]:
entity_chain.invoke("Who is Elon Musk and what is the relationship between him and SpaceX?")

Entities(names=['Elon Musk', 'SpaceX', 'CEO', 'Founder'])

In [15]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke(question)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [16]:
print(graph_retriever("Elon Musk founded Neuralink"))



Elon Musk - PROFESSOR -> Openai
Elon Musk - PROFESSOR -> Tesla Energy
Elon Musk - PROFESSOR -> Hyperloop
Elon Musk - PROFESSOR -> Tesla
Elon Musk - PROFESSOR -> Twitter
Elon Musk - OWNS -> X Corp.
Elon Musk - FOUNDED -> Xai
Elon Musk - FOUNDED -> Neuralink
Elon Musk - FOUNDED -> Openai
Elon Musk - FOUNDED -> Boring Company
Elon Musk - BROKER -> Kimbal Musk
Elon Musk - CO_FOUNDER -> Zip2
Elon Musk - CO_FOUNDER -> X.Com
Elon Musk - FOUNDER -> Zip2
Elon Musk - FOUNDER -> X.Com
Elon Musk - INVESTOR -> Tesla Motors, Inc.
Elon Musk - CHAIRMAN_OF -> Tesla
Elon Musk - FUNDING_PROVIDER -> Tesla
Elon Musk - FUNDING_PROVIDER -> X Corp
Elon Musk - ACQUIRER -> Twitter
Elon Musk - ENDORSES -> Right-Wing Conspiracy Theories
Elon Musk - ENDORSES -> Antisemitic Trope
Elon Musk - OWNED -> Twitter
Elon Musk - OWNER -> Twitter
Elon Musk - OPERATES -> X
Elon Musk - BROTHER -> Kimbal Musk
Elon Musk - COFOUNDER -> Zip2
Elon Musk - COFOUNDER -> X.Com
Right-Wing Conspiracy Theories - ENDORSE -> Elon Musk
Elon 

In [22]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in vector_retirever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

In [23]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [25]:
chain.invoke(input="Who is Elon Musk and what is the relationship between him and Paypal?")



'Elon Musk is a professor, founder, and CEO of several companies, including Neuralink, Openai, Boring Company, and Tesla. He is also a co-founder of Zip2 and X.Com, and has been involved with SpaceX. According to the graph data, Elon Musk is not directly related to the founding of PayPal, as John Doe is credited with founding the company. However, Elon Musk worked for PayPal before it was acquired by eBay.'