In [137]:
from langchain_community.llms import HuggingFaceHub

repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
READER_MODEL_NAME = "mistral-8x7-B"

llm_new = HuggingFaceHub(
    repo_id=repo_id,
    task="text-generation",
    huggingfacehub_api_token = "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",
    model_kwargs={
        "max_new_tokens": 100,
        "top_k": 10,
        "top_p": 0.95,
        "typical_p": 0.95,
        "temperature": 0.01,
        "repetition_penalty": 1.03,
    },
)

In [138]:
llm_new

HuggingFaceHub(client=<InferenceClient(model='mistralai/Mixtral-8x7B-Instruct-v0.1', timeout=None)>, repo_id='mistralai/Mixtral-8x7B-Instruct-v0.1', task='text-generation', model_kwargs={'max_new_tokens': 100, 'top_k': 10, 'top_p': 0.95, 'typical_p': 0.95, 'temperature': 0.01, 'repetition_penalty': 1.03}, huggingfacehub_api_token='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')

In [139]:
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain.chains import LLMChain

In [140]:
graph = Neo4jGraph(url="neo4j+s://46a80122.databases.neo4j.io:7687", username="neo4j", password="abcde777")

In [141]:
graph.refresh_schema()

In [142]:
print(graph.schema)

Node properties are the following:
Article {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Code {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Dataset {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Software Application {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING}
Relationship properties are the following:

The relationships are the following:
(:Article)-[:uses]->(:Dataset),(:Article)-[:provides]->(:Code),(:Article)-[:provides]->(:Software Application),(:Article)-[:cites]->(:Article)


In [143]:
chain = GraphCypherQAChain.from_llm(
    llm_new, graph=graph, verbose=True
)

In [144]:
#chain.invoke("How many published papers are there?")

In [145]:
#chain.invoke("How many articles are there?")

In [146]:
from langchain_core.prompts.prompt import PromptTemplate

CYPHER_GENERATION_TEMPLATE = """Task: Generate a Cypher statement to query a graph database.

Instructions:
Use only the provided node types, relationship types, and properties in the schema.
Do not use any other node types, relationship types, or properties that are not provided.

Schema:
{schema}

Note: Only output the Cypher statement. Do not include any explanations, instructions, or other text.

Examples of Cypher statements:

# How many datasets are used by articles?
MATCH (:Article)-[:uses]->(d:Dataset)
RETURN COUNT(DISTINCT d) AS numberOfDatasets

# Find articles that provide both code and software applications
MATCH (a:Article)-[:provides]->(c:Code)
MATCH (a)-[:provides]->(s:Software Application)
RETURN a.name AS articleName, c.name AS codeName, s.name AS softwareName

Question: {question}
Cypher statement:
"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)



chain = GraphCypherQAChain.from_llm(
    llm_new,
    graph=graph,
    verbose=True,
    return_intermediate_steps=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    output_parser=cypher_output_parser
)

In [147]:
from langchain.output_parsers import RegexParser

cypher_output_parser = RegexParser(
    regex=r"(?s).*?(MATCH|RETURN|CREATE|MERGE|DELETE|SET|REMOVE|FOREACH|WITH|UNWIND|CALL).*",
    output_keys=["cypher_query"],
)

In [148]:
def wrap_text_preserve_newlines(text, width=700):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text


def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['Schema:'].split("Answer:")[1])
    
    sources_used = ' \n'.join(
        [
            source.metadata['source']
            for source in llm_response['source_documents']
        ]
    )
    
    ans = ans + '\n\nSources: \n' + sources_used
    return ans

In [149]:
def llm_ans(query):
    #start = time.time()
    llm_response = chain.invoke(query)
    ans = process_llm_response(llm_response)
    #end = time.time()

    #time_elapsed = int(round(end - start, 0))
    #time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans 

In [150]:
llm_ans("How many articles are there?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mTask: Generate a Cypher statement to query a graph database.

Instructions:
Use only the provided node types, relationship types, and properties in the schema.
Do not use any other node types, relationship types, or properties that are not provided.

Schema:
Node properties are the following:
Article {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Code {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Dataset {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Software Application {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING}
Relationship properties are the following:

The relationships are the following:
(:Article)-[:uses]->(:Dataset),(:Article)-[:provides]->(:Code),(:Article)-[:provides]->(:Software Application),(:Article)-[:cites]->(:Article)


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Task': expected 'FOREACH', 'ALTER', 'CALL', 'USING PERIODIC COMMIT', 'CREATE', 'LOAD CSV', 'START DATABASE', 'STOP DATABASE', 'DEALLOCATE', 'DELETE', 'DENY', 'DETACH', 'DROP', 'DRYRUN', 'FINISH', 'GRANT', 'INSERT', 'MATCH', 'MERGE', 'NODETACH', 'OPTIONAL', 'REALLOCATE', 'REMOVE', 'RENAME', 'RETURN', 'REVOKE', 'ENABLE SERVER', 'SET', 'SHOW', 'TERMINATE', 'UNWIND', 'USE' or 'WITH' (line 1, column 1 (offset: 0))
"Task: Generate a Cypher statement to query a graph database."
 ^}

In [151]:
from langchain_community.llms import HuggingFaceHub
from langchain.chains import GraphCypherQAChain

repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
READER_MODEL_NAME = "mistral-8x7-B"

llm_new = HuggingFaceHub(
    repo_id=repo_id,
    task="text-generation",
    huggingfacehub_api_token="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",
    model_kwargs={
        "max_new_tokens": 100,
        "top_k": 10,
        "top_p": 0.95,
        "typical_p": 0.95,
        "temperature": 0.01,
        "repetition_penalty": 1.03,
    },
)

chain = GraphCypherQAChain.from_llm(
    llm_new,
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

raw_result = chain.invoke({"query": "How many people played in Top Gun?"})

# Post-process the result to extract the Cypher statement
cypher_statement = raw_result.split("Cypher statement:")[1].strip()
print(cypher_statement)

# You can now use the cypher_statement directly in your Neo4j query execution




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mTask: Generate a Cypher statement to query a graph database.

Instructions:
Use only the provided node types, relationship types, and properties in the schema.
Do not use any other node types, relationship types, or properties that are not provided.

Schema:
Node properties are the following:
Article {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Code {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Dataset {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING},Software Application {id: INTEGER, name: STRING, url: STRING, creativeWorkType: STRING, metadata_url: STRING}
Relationship properties are the following:

The relationships are the following:
(:Article)-[:uses]->(:Dataset),(:Article)-[:provides]->(:Code),(:Article)-[:provides]->(:Software Application),(:Article)-[:cites]->(:Article)


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Task': expected 'FOREACH', 'ALTER', 'CALL', 'USING PERIODIC COMMIT', 'CREATE', 'LOAD CSV', 'START DATABASE', 'STOP DATABASE', 'DEALLOCATE', 'DELETE', 'DENY', 'DETACH', 'DROP', 'DRYRUN', 'FINISH', 'GRANT', 'INSERT', 'MATCH', 'MERGE', 'NODETACH', 'OPTIONAL', 'REALLOCATE', 'REMOVE', 'RENAME', 'RETURN', 'REVOKE', 'ENABLE SERVER', 'SET', 'SHOW', 'TERMINATE', 'UNWIND', 'USE' or 'WITH' (line 1, column 1 (offset: 0))
"Task: Generate a Cypher statement to query a graph database."
 ^}