In [1]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.graphs import Neo4jGraph
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_core.output_parsers import StrOutputParser
from neo4j import GraphDatabase

load_dotenv()

True

In [2]:
def init_neo4j_connection():
    """Initialize Neo4j connection with error handling"""
    try:
        graph = Neo4jGraph(
            url=os.getenv("NEO4J_URI", ""),
            username=os.getenv("NEO4J_USERNAME", ""),
            password=os.getenv("NEO4J_PASSWORD", "")
        )
        
        driver = GraphDatabase.driver(
            os.getenv("NEO4J_URI", ""),
            auth=(os.getenv("NEO4J_USERNAME", ""), os.getenv("NEO4J_PASSWORD", ""))
        )
        
        return graph, driver
    except Exception as e:
        print(f"Failed to initialize Neo4j connection: {str(e)}")
        return None, None

In [10]:
graph, driver = init_neo4j_connection()

In [14]:
def get_schema(graph):
    """Get schema with error handling"""
    try:
        return graph.get_schema if graph else ""
    except Exception as e:
        print(f"APOC error retrieving schema: {str(e)}")
        return ""

In [15]:
schema = get_schema(graph)

In [16]:
schema

'Node properties:\nresume {name: STRING}\nFile {name: STRING}\nEntity {name: STRING}\nRelationship properties:\n\nThe relationships:\n(:File)-[:BELONGS_TO]->(:resume)\n(:Entity)-[:HAS_SKILLS]->(:Entity)\n(:Entity)-[:LIVES_IN]->(:Entity)\n(:Entity)-[:HAS_FILE]->(:File)\n(:Entity)-[:HAS_EXPERIENCE]->(:Entity)\n(:Entity)-[:LOCATED_IN]->(:Entity)\n(:Entity)-[:HAS_CERTIFICATION]->(:Entity)\n(:Entity)-[:HAS_PROJECTS]->(:Entity)\n(:Entity)-[:HAS_EDUCATION]->(:Entity)\n(:Entity)-[:HAS_CONFERENCE]->(:Entity)'

In [41]:
def init_qa_chain(graph):
    """Initialize QA chain with error handling"""
    try:
        template = """
        Task: Generate a Cypher statement to query the graph database.
        You will be given a rephrased query.
        Generate a cypher statement to answer the rephrased query.

        Instructions:
        1. Search for nodes and their relationships
        2. Return relevant information about the queried entities
        3. Don't restrict to specific labels, search across all nodes
        4. Use CONTAINS or other fuzzy matching when appropriate

        schema:
        {schema}

        Note: Focus on finding relevant information rather than exact matches.

        Rephrased Query: {query}

        Cypher Statement:
        """ 

        question_prompt = PromptTemplate(
            template=template, 
            input_variables=["schema", "query"] 
        )

        llm = ChatOpenAI(model="gpt-4o", temperature=0)

        return GraphCypherQAChain.from_llm(
            llm=llm,
            graph=graph,
            cypher_prompt=question_prompt,
            verbose=True,
            allow_dangerous_requests=True,
            return_intermediate_steps=True
        )
    except Exception as e:
        print(f"Error initializing QA chain: {str(e)}")
        return None

In [20]:
qa = init_qa_chain(graph)

In [33]:
def relationship_to_string(relationship):
    """Convert a Neo4j Relationship into a string like:
       (:Entity {name: "Hasnain Ali Poonja"})-[:HAS_EDUCATION]->(:Entity {name: "NUST SMME"})
    """
    try:
        node1, node2 = relationship.nodes
        
        label1 = list(node1.labels)[0] if node1.labels else ""
        label2 = list(node2.labels)[0] if node2.labels else ""
        
        name1 = node1.get("name", "Unknown")
        name2 = node2.get("name", "Unknown")
        
        rel_type = relationship.type
        
        return f'(:{label1} {{name: "{name1}"}})-[:{rel_type}]->(:{label2} {{name: "{name2}"}})'
    except Exception as e:
        print(f"Error in relationship_to_string: {str(e)}")
        return "Unknown relationship"

def get_all_nodes_and_relationships(driver, file_names_list=None):
    """Get all nodes and relationships with robust error handling"""
    if not driver:
        return [], []
        
    nodes_list = []
    relationships_list = []
    
    try:
        with driver.session() as session:
            base_query = """
                MATCH (n:Entity)
                OPTIONAL MATCH (n)-[r]-(m:Entity)
                RETURN DISTINCT n, r, m
            """
            
            result = session.run(base_query)
            for record in result:
                if record.get("n") and record["n"].get("name"):
                    nodes_list.append(record["n"]["name"])
                if record.get("m") and record["m"].get("name"):
                    nodes_list.append(record["m"]["name"])
                if record.get("r") is not None:
                    rel_str = relationship_to_string(record["r"])
                    relationships_list.append(rel_str)
        
        return list(set(nodes_list)), list(set(relationships_list))
    except Exception as e:
        print(f"Error in get_all_nodes_and_relationships: {str(e)}")
        return [], []
    
nodes_list, relationships_list = get_all_nodes_and_relationships(driver)

In [34]:
relationships_list

['(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Agile"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Problem Solving Skills"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Asana"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "GIT Version Control"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Wordpress"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_PROJECTS]->(:Entity {name: "Video Streaming Web App & Mobile APIs"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "RESTful APIs"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_PROJECTS]->(:Entity {name: "ERP for Sharmeen Group of Companies"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "API Integration"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "JavaScript"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Scrum"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SK

In [35]:
def rephrase_query_chain(query, nodes, relationships):
    """Rephrase query with error handling"""
    if not query:
        return None
        
    try:
        llm = ChatOpenAI(model="gpt-4o", temperature=0)
        prompt = PromptTemplate(
            template=""" You are a highly skilled assistant that specializes in rephrasing user queries using the list of nodes and the 
            list of relationships in the graph database. 

            The rephrased query should include the exact node and the relevant relationship of the node from the list of nodes and the list of relationships.

            List of nodes: {nodes}
            List of relationships: {relationships}
            Query: {query}

            Rephrased Query:
            """
        )
        
        chain = prompt | llm
        return chain.invoke({"nodes": nodes, "relationships": relationships, "query": query})
    except Exception as e:
        print(f"Error in rephrase_query_chain: {str(e)}")
        return None

In [48]:
query = "Where they live?"
rephrased = rephrase_query_chain(query, nodes_list, relationships_list)

In [40]:
rephrased

AIMessage(content='What skills are shared between the entities "Khawer Ali" and "Ayaan" based on their respective [:HAS_SKILLS] relationships?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 2120, 'total_tokens': 2149, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 2048}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_46bff0e0c8', 'id': 'chatcmpl-CACe0RfAoxvwocbgk0eTwZxhBf4J0', 'finish_reason': 'stop', 'logprobs': None}, id='run--bef0216e-0ff0-494e-b13d-8fec74d20ed4-0', usage_metadata={'input_tokens': 2120, 'output_tokens': 29, 'total_tokens': 2149, 'input_token_details': {'audio': 0, 'cache_read': 2048}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [49]:
result = qa.invoke({
    "query": rephrased.content,
    "schema": schema
})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (e1:Entity {name: 'Khawer Ali'})-[:LIVES_IN]->(e2:Entity)
WHERE e2.name CONTAINS 'Karachi'
RETURN e2.name AS Location
[0m
Full Context:
[32;1m[1;3m[{'Location': 'DHA, Karachi, Pakistan'}, {'Location': 'Karachi'}][0m

[1m> Finished chain.[0m


In [50]:
template = """
You will be provided with user query, schema of knowledge graph and result from cypher query chain
your task is to draft the final answer based on the result and the user query.

Result: {result}
Final Answer:
"""

prompt = PromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4o", temperature=0)

chain = prompt | llm



In [51]:
final_answer = chain.invoke({
    "result": result
})

final_answer.content

'Khawer Ali resides in DHA, Karachi, Pakistan.'