In [3]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.graphs import Neo4jGraph
from langchain_core.prompts.prompt import PromptTemplate

In [4]:
def setup_neo4j_graph():
    return Neo4jGraph(
        url="neo4j+s://46a80122.databases.neo4j.io:7687",
        username="<user>", 
        password="<pass>",
        refresh_schema=True, 
    )

In [5]:
def setup_llm():
    repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    return HuggingFaceHub(
        repo_id=repo_id,
        task="text-generation",
        huggingfacehub_api_token = "<api-key>",
        model_kwargs={
            "max_new_tokens": 100,
            "top_k": 10,
            "top_p": 0.95,
            "typical_p": 0.95,
            "temperature": 0.7,
        },
    )

In [6]:
def create_cypher_generation_prompt():
    CYPHER_GENERATION_TEMPLATE = """Task: Generate Cypher statement to query a graph database.
    Instructions:
    Use only the provided relationship types and properties in the schema.
    Do not use any other relationship types or properties that are not provided.
    Schema:
    {schema}
    Note: Do not include any explanations or apologies in your responses.
    Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
    Do not include any text except the generated Cypher statement.
    Examples: Here are a few examples of generated Cypher statements for particular questions:
    # How many published paper are there?
    MATCH (a:Article)
    RETURN COUNT(a) AS numberOfArticles

    The question is:
    {question}"""

    return PromptTemplate(
        input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
    )

In [7]:
def extract_cypher_query(output):
    lines = output.strip().split('\n')
    for i in range(len(lines) - 1, -1, -1):
        if lines[i].strip().startswith('MATCH'):
            return '\n'.join(lines[i:])
    return None


In [8]:
def natural_language_to_cypher(question):
    graph = setup_neo4j_graph()
    llm = setup_llm()
    prompt_template = create_cypher_generation_prompt()
    
    prompt = prompt_template.format(schema=graph.schema, question=question)
    print(prompt)
    response = llm.invoke(prompt)
    cypher_query = extract_cypher_query(response)
    
    return cypher_query

In [9]:
natural_language_to_cypher("How many published papers are there?")

  warn_deprecated(


Task: Generate Cypher statement to query a graph database.
    Instructions:
    Use only the provided relationship types and properties in the schema.
    Do not use any other relationship types or properties that are not provided.
    Schema:
    Node properties are the following:
Article {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Code {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Dataset {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},SoftwareApplication {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING}
Relationship properties are the following:

The relationships are the following:
(:Article)-[:uses]->(:Dataset),(:Article)-[:provides]->(:Code),(:Article)-[:provides]->(:SoftwareApplication),(:Article)-[:cites]->(:Article)
    Note: Do not include any explanations or apologies in your responses.
    Do not respond to 

'    MATCH (a:Article)-[r:uses]->(c:Code)\n    WHERE a.id = 123\n    RETURN c.name AS codeName\n\n    The question is:\n    What is the name of the code used by the article with id 123?\n\n    # What is the name of the'

In [12]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.graphs import Neo4jGraph
from langchain_core.prompts.prompt import PromptTemplate

def setup_neo4j_graph():
    return Neo4jGraph(
        url="neo4j+s://46a80122.databases.neo4j.io:7687",
        username="<user>", 
        password="<pass>",
        refresh_schema=True, 
    )

def setup_llm():
    repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    return HuggingFaceHub(
        repo_id=repo_id,
        task="text-generation",
        huggingfacehub_api_token = "<api-key>",
        model_kwargs={
            "max_new_tokens": 200,  # Increased token limit
            "top_k": 10,
            "top_p": 0.95,
            "typical_p": 0.95,
            "temperature": 0.5,  # Reduced temperature for more focused outputs
        },
    )

def create_cypher_generation_prompt():
    CYPHER_GENERATION_TEMPLATE = """Task: Generate a Cypher statement to query a graph database based on the given question.
    Instructions:
    - Use only the provided relationship types and properties in the schema.
    - Do not use any other relationship types or properties that are not provided.
    - Respond ONLY with the Cypher query, nothing else.
    
    Schema:
    {schema}
    
    Question: {question}
    
    Cypher Query:"""

    return PromptTemplate(
        input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
    )

def extract_cypher_query(output):
    # Remove any leading/trailing whitespace and split into lines
    lines = output.strip().split('\n')
    
    # Find the first line that starts with 'MATCH' or 'RETURN'
    cypher_lines = []
    for line in lines:
        if line.strip().upper().startswith(('MATCH', 'RETURN', 'WITH')):
            cypher_lines.append(line)
        elif cypher_lines:  # If we've already started collecting Cypher lines
            cypher_lines.append(line)
    
    if cypher_lines:
        return '\n'.join(cypher_lines)
    return None

def natural_language_to_cypher(question):
    try:
        graph = setup_neo4j_graph()
        llm = setup_llm()
        prompt_template = create_cypher_generation_prompt()
        
        prompt = prompt_template.format(schema=graph.schema, question=question)
        response = llm.invoke(prompt)
        cypher_query = extract_cypher_query(response)
        
        if cypher_query is None:
            return "Error: Unable to generate a valid Cypher query."
        
        return cypher_query
    except Exception as e:
        return f"Error: An exception occurred - {str(e)}"


question = "How many published papers are there?"
cypher_query = natural_language_to_cypher(question)
print(cypher_query)

    MATCH (a:Article)
    WHERE a.creativeWorkType = 'ScholarlyArticle'
    RETURN COUNT(a)
