In [36]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.graphs import Neo4jGraph
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_core.output_parsers import StrOutputParser
from neo4j import GraphDatabase

load_dotenv()

True

In [37]:
URI = os.getenv("NEO4J_URI")
USERNAME = os.getenv("NEO4J_USERNAME")
PASSWORD = os.getenv("NEO4J_PASSWORD")

def init_neo4j_connection():
    graph = Neo4jGraph(url=URI, username=USERNAME, password=PASSWORD)
    driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))
    
    return graph, driver

In [38]:
graph, driver = init_neo4j_connection()

In [39]:
def get_schema(graph):
    return graph.get_schema if graph else ""

In [40]:
schema = get_schema(graph)

In [41]:
schema

'Node properties:\nresume {name: STRING}\nFile {name: STRING}\nEntity {name: STRING}\nSCIENCE_ARTICLE {name: STRING}\nRelationship properties:\n\nThe relationships:\n(:File)-[:BELONGS_TO]->(:resume)\n(:Entity)-[:HAS_SKILLS]->(:Entity)\n(:Entity)-[:LIVES_IN]->(:Entity)\n(:Entity)-[:HAS_FILE]->(:File)\n(:Entity)-[:HAS_EXPERIENCE]->(:Entity)\n(:Entity)-[:AS]->(:Entity)\n(:Entity)-[:LOCATED_IN]->(:Entity)\n(:Entity)-[:HAS_CERTIFICATION]->(:Entity)\n(:Entity)-[:HAS_PROJECTS]->(:Entity)\n(:Entity)-[:HAS_EDUCATION]->(:Entity)\n(:Entity)-[:HAS_CONFERENCE]->(:Entity)\n(:Entity)-[:FROM]->(:Entity)\n(:Entity)-[:HAS_MEMBERSHIP]->(:Entity)\n(:Entity)-[:HAS_CERTIFICATIONS]->(:Entity)\n(:Entity)-[:HAS_COURSES]->(:Entity)'

In [42]:
def init_qa_chain(graph):

    template = """
    Task: Generate a Cypher statement to query the graph database.
    You will be given a rephrased query.
    Generate a cypher statement to answer the rephrased query.

    Instructions:
    1. Search for nodes and their relationships
    2. Return relevant information about the queried entities
    3. Don't restrict to specific labels, search across all nodes
    4. Use CONTAINS or other fuzzy matching when appropriate

    schema:
    {schema}

    Note: Focus on finding relevant information rather than exact matches.

    Rephrased Query: {query}

    Cypher Statement:
    """ 

    question_prompt = PromptTemplate.from_template(template)
    llm = ChatOpenAI(model="gpt-4o")

    chain = GraphCypherQAChain.from_llm(
        llm=llm,
        graph=graph,
        cypher_prompt=question_prompt,
        verbose=True,
        allow_dangerous_requests=True,
        return_intermediate_steps=True,
    )
    
    return chain

In [43]:
qa = init_qa_chain(graph)

In [44]:
def relationship_to_string(relationship):
    node1, node2 = relationship.nodes
    
    label1 = list(node1.labels)[0] if node1.labels else ""
    label2 = list(node2.labels)[0] if node2.labels else ""
    
    name1 = node1.get("name", "Unknown")
    name2 = node2.get("name", "Unknown")
    
    rel_type = relationship.type
    
    return f'(:{label1} {{name: "{name1}"}})-[:{rel_type}]->(:{label2} {{name: "{name2}"}})' 

def get_all_nodes_and_relationships(driver, file_names_list=None):
    
    if not driver:
        return [], []
    
    nodes_list = []
    relationships_list = []
    
    with driver.session() as session:
        base_query = """
        MATCH (n:Entity)
        OPTIONAL MATCH (n)-[r]-(m:Entity)
        RETURN DISTINCT n, r, m
        """   
        
        result = session.run(base_query)
        
        for record in result:
            if record.get("n") and record["n"].get("name"):
                nodes_list.append(record["n"]["name"])
            if record.get("m") and record["m"].get("name"):
                nodes_list.append(record["m"]["name"])
            if record.get("r") is not None:
                rel_str = relationship_to_string(record["r"])
                relationships_list.append(rel_str)
    
    return list(set(nodes_list)), list(set(relationships_list))   

nodes_list, relationships_list = get_all_nodes_and_relationships(driver)     

In [45]:
relationships_list

['(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "RESTful APIs"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "API Integration"})',
 '(:Entity {name: "Yasir Khan"})-[:HAS_CERTIFICATION]->(:Entity {name: "Python Programmer Bootcamp"})',
 '(:Entity {name: "Aun Ali"})-[:HAS_SKILLS]->(:Entity {name: "Finance Operations"})',
 '(:Entity {name: "Yasir Khan"})-[:HAS_SKILLS]->(:Entity {name: "OpenCV"})',
 '(:Entity {name: "Hasnain Ali Poonja"})-[:HAS_EDUCATION]->(:Entity {name: "MS Robotics and Artificial Intelligence"})',
 '(:Entity {name: "Yasir Khan"})-[:HAS_PROJECTS]->(:Entity {name: "E-Sports"})',
 '(:Entity {name: "Aun Ali"})-[:HAS_MEMBERSHIP]->(:Entity {name: "ACCA, UK"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Laravel MVC Framework"})',
 '(:Entity {name: "Hasnain Ali Poonja"})-[:HAS_EXPERIENCE]->(:Entity {name: "CrossWing"})',
 '(:Entity {name: "Khawer Ali"})-[:HAS_SKILLS]->(:Entity {name: "Websockets"})',
 '(:Entity {name:

In [46]:
def rephrase_query_chain(query, nodes, relationships):
    if not query:
        return None
    
    rephrase_template=""" You are a highly skilled assistant that specializes in rephrasing user queries using the list of nodes and the list of relationships in the graph database. 

    The rephrased query should include the exact node and the relevant relationship of the node from the list of nodes and the list of relationships.

    List of nodes: {nodes}
    List of relationships: {relationships}
    Query: {query}

    Rephrased Query:
    """
    
    llm = ChatOpenAI(model="gpt-4o")
    prompt = PromptTemplate.from_template(rephrase_template)
    
    chain = prompt | llm
    result = chain.invoke({"nodes": nodes, "relationships": relationships, "query": query})
    
    return result

In [47]:
query = "which candidate has git skills"
result = rephrase_query_chain(query, nodes_list, relationships_list)

In [48]:
result.content

'Which candidate has the skill of "Git" as specified in the relationship (Node: "Khawer Ali", Relationship: HAS_SKILLS, Node: "Git")?'

In [49]:
final_result = qa.invoke({"query": result.content, "schema": schema})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (candidate:Entity)-[:HAS_SKILLS]->(skill:Entity)
WHERE skill.name CONTAINS "Git"
RETURN candidate.name
[0m
Full Context:
[32;1m[1;3m[{'candidate.name': 'Khawer Ali'}, {'candidate.name': 'Yasir Khan'}][0m

[1m> Finished chain.[0m


In [52]:
final_result

{'query': 'Which candidate has the skill of "Git" as specified in the relationship (Node: "Khawer Ali", Relationship: HAS_SKILLS, Node: "Git")?',
 'schema': 'Node properties:\nresume {name: STRING}\nFile {name: STRING}\nEntity {name: STRING}\nSCIENCE_ARTICLE {name: STRING}\nRelationship properties:\n\nThe relationships:\n(:File)-[:BELONGS_TO]->(:resume)\n(:Entity)-[:HAS_SKILLS]->(:Entity)\n(:Entity)-[:LIVES_IN]->(:Entity)\n(:Entity)-[:HAS_FILE]->(:File)\n(:Entity)-[:HAS_EXPERIENCE]->(:Entity)\n(:Entity)-[:AS]->(:Entity)\n(:Entity)-[:LOCATED_IN]->(:Entity)\n(:Entity)-[:HAS_CERTIFICATION]->(:Entity)\n(:Entity)-[:HAS_PROJECTS]->(:Entity)\n(:Entity)-[:HAS_EDUCATION]->(:Entity)\n(:Entity)-[:HAS_CONFERENCE]->(:Entity)\n(:Entity)-[:FROM]->(:Entity)\n(:Entity)-[:HAS_MEMBERSHIP]->(:Entity)\n(:Entity)-[:HAS_CERTIFICATIONS]->(:Entity)\n(:Entity)-[:HAS_COURSES]->(:Entity)',
 'result': 'Khawer Ali has the skill of "Git".',
 'intermediate_steps': [{'query': 'cypher\nMATCH (candidate:Entity)-[:HAS_SK

In [53]:
combined_answer_template = """
You will be provided with a user query, schema of knowledge graph and result from
cypher query chain.

Your task is to draft a final answer based on the result and the user query. Before drafting 
the final response use context provided by the result.
Schema: {schema}
Result: {final_result}
Combined Answer:
"""

prompt = PromptTemplate.from_template(combined_answer_template)
llm = ChatOpenAI(model="gpt-4o")

chain = prompt | llm

combined_result = chain.invoke({"final_result": final_result, "schema":schema})

In [54]:
combined_result.content

'The query focused on identifying candidates who possess the skill "Git." Based on the result from the cypher query, it was found that Khawer Ali has the skill of "Git." Additionally, Yasir Khan was also identified to possess the skill in the intermediate steps of the query process. However, the final result highlights Khawer Ali with the specified skill.'