In [1]:
#Imports
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
import os

In [2]:
from langchain.chains import GraphCypherQAChain
from langchain_groq import ChatGroq
from langchain_community.graphs import Neo4jGraph

graph = Neo4jGraph()

llm = ChatGroq(
    model = "llama-3.1-70b-versatile",
    temperature = 0,
    max_tokens = 2000
)

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)

In [3]:
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
graph = Neo4jGraph(url =NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)

In [4]:
graph.refresh_schema()
schema = graph.schema
print(schema)

Node properties:
Year {year: INTEGER}
Month {name: STRING, Rain: FLOAT, month: INTEGER, AF: FLOAT, Tmax: FLOAT, Tmin: FLOAT, Tmean: FLOAT, Sun: FLOAT, Date: DATE}
Relationship properties:

The relationships:
(:Year)-[:HAS_MONTH]->(:Month)


In [26]:
examples = [
    {
        "question": "Get all weather data for a specific year",
        "query": """MATCH (y:Year {{year:1963}})-[:HAS_MONTH]->(m:Month) 
        RETURN m.name AS Month, m.Tmax AS MaxTemperature, m.Tmin AS MinTemperature,
        m.Rain AS Rainfall, m.Sun AS Sunlight, m.Tmean AS MeanTemperature
        ORDER BY m.month""",
    },
    {
        "question": "what was average temperature in between 1980 and 1990",
        "query": """MATCH (y:Year)-[:HAS_MONTH]->(m:Month)
                    WHERE y.year >= 1980 AND y.year <= 1990
                    RETURN AVG(m.Rain) AS average_rainfall""",
    },
     {
        "question": "what was the most rainy year?",
        "query": """MATCH (y:Year)-[:HAS_MONTH]->(m:Month)
        WITH y.year AS year, AVG(m.Rain) AS avg_rain
        RETURN year, avg_rain
        ORDER BY avg_rain DESC
        LIMIT 1""",
    },
    {
        "question": "In which year we had maximum temperature in may?",
        "query": """MATCH (y:Year)-[:HAS_MONTH]->(m:Month)
        WHERE m.month = 5
        WITH y.year AS year, MAX(m.Tmax) AS max_temp
        RETURN year, max_temp
        ORDER BY max_temp DESC
        LIMIT 1""",
    },

]



example_prompt = PromptTemplate.from_template(
    "User input: {question}\nquery: {query}"
)

prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix=
    """
    You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.Here is the schema information \nn{schema}.
  
    
    Task:
        You are provided with a natural language query. Your goal is to:
        Understand the user's intent: Identify what the user is asking for, such as retrieving data, finding averages, or comparing values, average between to given dates or months or years
        Generate an appropriate Cypher query: Based on the graph structure and properties provided above, translate the user's natural language query into a Cypher query.
        Return the Cypher query: Ensure the query is correctly formatted and accounts for all relevant properties.

        Use logical operators like >= and <= for filtering node for between specific years, months or dtaes.DO NOT use range() for this
        WHERE clauses should be used for property value comparisons

        
        Given a natural language query, generate a Cypher query that accurately reflects the user's request,
        ensuring that any specific year mentioned in the query is directly translated into the Cypher query. For instance, 
        if the user asks for data from 1991 or in 1991, the Cypher query should explicitly reference the year 1991 in the `MATCH` statement.
        if the user asks for data from january or in january or any other month, the Cypher query should explicitly reference the month anuary or month 1 in the `MATCH` statement.
        ALways give answer obtained from data and never give any suggestion or interpretation  

        Below are a number of examples of questions and their corresponding Cypher queries.",
    """,
    suffix="User input: {question}\nquery: ",
    input_variables=["question", "schema"],
)

In [27]:
print(prompt.format(question="what is the average temperature in 1999", schema= schema))


    You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.Here is the schema information 
nNode properties:
Year {year: INTEGER}
Month {name: STRING, Rain: FLOAT, month: INTEGER, AF: FLOAT, Tmax: FLOAT, Tmin: FLOAT, Tmean: FLOAT, Sun: FLOAT, Date: DATE}
Relationship properties:

The relationships:
(:Year)-[:HAS_MONTH]->(:Month).
  
    
    Task:
        You are provided with a natural language query. Your goal is to:
        Understand the user's intent: Identify what the user is asking for, such as retrieving data, finding averages, or comparing values, average between to given dates or months or years
        Generate an appropriate Cypher query: Based on the graph structure and properties provided above, translate the user's natural language query into a Cypher query.
        Return the Cypher query: Ensure the query is correctly formatted and accounts for all relevant properties.

        Use logical operators like >= and <= for filte

In [28]:
chain = GraphCypherQAChain.from_llm(
    graph=graph, llm=llm, cypher_prompt=prompt, verbose=True
)

In [32]:
result = chain.invoke({"query":"which was raniest year between 2000 and 2010?"})
print(result)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (y:Year)-[:HAS_MONTH]->(m:Month)
        WHERE y.year >= 2000 AND y.year <= 2010
        WITH y.year AS year, AVG(m.Rain) AS avg_rain
        RETURN year, avg_rain
        ORDER BY avg_rain DESC
        LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'year': 2002, 'avg_rain': 82.5}][0m

[1m> Finished chain.[0m
{'query': 'which was raniest year between 2000 and 2010?', 'result': 'The rainiest year between 2000 and 2010 was 2002.'}


from langchain_core.prompts import ChatPromptTemplate
system_message = """

You are interacting with a Neo4j graph database containing weather data structured as follows:

Graph Structure:
        Year Node (Year)
        Properties:
        year (Integer): 

        Month Node (Month)
        Properties:
        month (Integer): The numeric representation of the month (1 = January, 2 = February, ..., 12 = December).
        name (String): The name of the month (e.g., "January", "February").
        Tmax (Float): Maximum temperature recorded in the month.
        Tmin (Float): Minimum temperature recorded in the month.
        AF (Float): Number of air frosts in the month.
        Rain (Float): Rainfall recorded in millimeters.
        Sun (Float): Sunlight recorded in hours.
        Tmean (Float): Mean temperature in the month.
        Date (Date): Date representation (e.g., "1963-01-01").

        Relationships:
        HAS_MONTH: Connects Year nodes to their respective Month nodes.

Task:
        You are provided with a natural language query. Your goal is to:
        Understand the user's intent: Identify what the user is asking for, such as retrieving data, finding averages, or comparing values, average between to given dates or months or years
        Generate an appropriate Cypher query: Based on the graph structure and properties provided above, translate the user's natural language query into a Cypher query.
        Return the Cypher query: Ensure the query is correctly formatted and accounts for all relevant properties.

        Use logical operators like >= and <= for filtering node for between specific years, months or dtaes.DO NOT use range() for this
        WHERE clauses should be used for property value comparisons 
        


Examples of Natural Language Queries:
        "What was the average temperature in a given year on month?"
        "Show me the month with the most rainfall in given year."
        "Get all the weather data for January across all years."
        "Which year had the highest average temperature in July?"
        "what was the average temperature between two years or months or dates"
        "what was the maximum temperature between two years or months or dates"

Given a natural language query, generate a Cypher query that accurately reflects the user's request,
ensuring that any specific year mentioned in the query is directly translated into the Cypher query. For instance, 
if the user asks for data from 1991 or in 1991, the Cypher query should explicitly reference the year 1991 in the `MATCH` statement.
if the user asks for data from january or in january or any other month, the Cypher query should explicitly reference the month anuary or month 1 in the `MATCH` statement.
ALways give answer obtained from data and never give any suggestion or interpretation

User question: {query}


"""

prompt = ChatPromptTemplate.from_template(system_message)