# Install libraries

In [None]:
!pip install langchain langchain-community langchain-groq transformers
!pip install llama-index
!pip install langchain-experimental
!pip install python-dotenv

# Step 1: Load and preprocess text data

In [13]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

# Load text data
text = """Sarah is an employee at prismaticAI, a leading technology company based in Westside Valley. She has been working there for the past three years as a software engineer.
Michael is also an employee at prismaticAI, where he works as a data scientist. He joined the company two years ago after completing his graduate studies.
prismaticAI is a well-known technology company that specializes in developing cutting-edge software solutions and artificial intelligence applications. The company has a diverse workforce of talented individuals from various backgrounds.
Both Sarah and Michael are highly skilled professionals who contribute significantly to prismaticAI's success. They work closely with their respective teams to develop innovative products and services that meet the evolving needs of the company's clients."""


documents = [Document(page_content=text)]
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
texts = text_splitter.split_documents(documents)

# Step 2: Initialize language model and extract knowledge graph

In [27]:
from langchain_groq import ChatGroq
from langchain_experimental.graph_transformers import LLMGraphTransformer
import getpass
import os

# Set Groq API key
os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your Groq API key: ")

# Initialize Groq LLM
llm = ChatGroq(
    temperature=0,
    model_name="meta-llama/llama-4-scout-17b-16e-instruct"
)

# Extract Knowledge Graph
llm_transformer = LLMGraphTransformer(llm=llm)

# Assuming you have your texts defined
graph_documents = llm_transformer.convert_to_graph_documents(texts)

# Step 3: Store knowledge graph in a database

In [None]:
!pip install neo4j langchain_neo4j

In [88]:
from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get Neo4j connection details from environment variables
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

# Initialize the Neo4j graph wrapper
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
)

# Persist your extracted GraphDocument objects into Neo4j
graph.add_graph_documents(
    graph_documents,
)

# Step 4: Retrieve knowledge for RAG

In [62]:
from langchain_neo4j import GraphCypherQAChain


# Create the RAG chain with Cypher-based retrieval
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True
)

# Step 5: Query the knowledge graph and generate a response

In [73]:
def query_graph(query):
    try:
        response = chain.invoke({"query": query})
        return response["result"]
    except Exception as e:
        return f"Error: {str(e)}"

query_graph("Does Michael work for the same company as Sarah?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (m:Person {id: "Michael"})-[:EMPLOYEE]->(o:Organization)
MATCH (s:Person {id: "Sarah"})-[:EMPLOYEE]->(o2:Organization)
RETURN o.id = o2.id AS result
[0m
Full Context:
[32;1m[1;3m[{'result': True}][0m

[1m> Finished chain.[0m


'Yes, Michael works for the same company as Sarah.'

# Inference with gradio

In [None]:
!pip install gradio

In [86]:
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader, TextLoader
import os

qa_chain  = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True
)

# Handle file upload and process it into Neo4j
def upload_document(file):
    _, ext = os.path.splitext(file.name)
    
    if ext.lower() == ".pdf":
        loader = PyPDFLoader(file.name)
    elif ext.lower() == ".txt":
        loader = TextLoader(file.name)
    else:
        return "Unsupported file type. Please upload a PDF or TXT file."
    
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
    texts = text_splitter.split_documents(documents)

    graph_documents = llm_transformer.convert_to_graph_documents(texts)

    graph.add_graph_documents(
        graph_documents,
    )
    return "✅ Document processed and added to the knowledge graph!"

# Ask a question
def query_graph(query):
    try:
        response = qa_chain.invoke({"query": query})
        return response["result"]
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 📄 Upload a Document & Ask Questions")
    
    with gr.Row():
        file_upload = gr.File(label="Upload PDF or TXT", file_types=[".pdf", ".txt"])
        upload_btn = gr.Button("Process and Extract")
        upload_output = gr.Textbox(label="Upload Status")

    with gr.Row():
        query_input = gr.Textbox(label="Ask a Question")
        query_output = gr.Textbox(label="Answer")
        ask_btn = gr.Button("Ask")

    upload_btn.click(fn=upload_document, inputs=file_upload, outputs=upload_output)
    ask_btn.click(fn=query_graph, inputs=query_input, outputs=query_output)

demo.launch(share=True, debug=True)

* Running on local URL:  http://127.0.0.1:7864

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


Created a chunk of size 213, which is longer than the specified 200
Created a chunk of size 318, which is longer than the specified 200




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (o:Occupation {id: "AI Agent"}) RETURN o[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (m:Person {id: "Michael"})-[:EMPLOYEE]->(o:Organization)
MATCH (s:Person {id: "Sarah"})-[:EMPLOYEE]->(o2:Organization)
RETURN o.id = o2.id AS result
[0m
Full Context:
[32;1m[1;3m[{'result': True}][0m

[1m> Finished chain.[0m
Keyboard interruption in main thread... closing server.


