# Enhancing RAG with Neo4j Knowledge Graph


Import modules and instantiate connections and models

In [1]:
import snowflake.connector
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


import config
from llm import Cortex
from ner import EntityFinder
from graph import NewsGraphClient


snowflake_connection = snowflake.connector.connect(**config.SNOWFLAKE_CONNECTION_PARAMS)
model = Cortex(connection=snowflake_connection, model=config.CHAT_MODEL)
entity_finder = EntityFinder(config.RELEVANT_LABELS)
db = NewsGraphClient()

# Define helper functions
def map_candidates_to_context(candidates: list[dict[str, str]]) -> str:
    context_str = ', '.join(
        f"(:{c['label']} {{ name: '{c['name']}' }}"
        for c in candidates
    )
    return context_str


def map_records_to_context(db_records: list[dict]) -> str:
    context_str = ('='*5).join(
        '/n'.join(f"{k}: {v}" for k, v in record.items())
        for record in db_records
    )
    return context_str


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CYPHER_GENERATION_TEMPLATE = """Based on the graph schema below, write a Cypher query that answers the user's question. 
Use only the node labels, relationships and properties provided in the schema:
{schema}
Entities in the question map to the following database values:
{entities_list}

Here are some examples: 
Example 1: For the question "List 10 titles of articles mentioning Ursula von der Leyen" and the entity list "(:Person {{ name: 'Ursula von der Leyen' }}, (:Person {{ name: 'Ursula v. d. Leyn' }}" the generated Cypher query should be 
"MATCH (a:Article)-[:CONTAINS]->(c:Chunk)-[:MENTIONS]->(o:Person) WHERE o.name IN ['Ursula von der Leyen', 'Ursula v. d. Leyn'] RETURN DISTINCT a.title LIMIT 10"

Example 2: For the question "How many sources mention the EU commission?" and the entity list "(:Organization {{ name: 'EU-Kommission' }}" the generated Cypher query should be 
"MATCH (s:Source)-[:PUBLISHED]->(a:Article)-[:CONTAINS]->(c:Chunk)-[:MENTIONS]->(o:Organization) WHERE o.name IN ['EU-Kommission'] WITH DISTINCT s RETURN count(s)"

Example 3: For the question "News about France and Macron?" and the entity list "(:Location {{ name: 'France' }}, (:Person {{ name: 'Emmanuel Macron' }}" the generated Cypher query should be 
"MATCH (c:Chunk)-[:MENTIONS]->(o:Location) WHERE o.name = 'France' UNION MATCH (c:Chunk)-[:MENTIONS]->(o:Person) WHERE o.name = 'Emmanuel Macron' RETURN c.text LIMIT 10"

Question: {question}
Cypher query:"""

## Generate a Cypher query

In [3]:
question = 'What news are there about Volt?'
# Get entities from text
mentioned_entities = entity_finder.find(question)
# Perform fulltext search
candidates = db.lookup_mentioned_entities(mentioned_entities)
candidate_context = map_candidates_to_context(candidates)
print(candidate_context)

(:Organization { name: 'VOLT' }, (:Organization { name: 'Volt' }, (:Organization { name: 'Europapartei Volt' }, (:Organization { name: 'Volt-Partei' }, (:Organization { name: 'Volt-Fraktionschef' }, (:Organization { name: 'Volt-Bündnis' }, (:Organization { name: 'Volt-Mitgründer' }, (:Organization { name: 'Partei Volt' }, (:Organization { name: 'Euopapartei Volt' }, (:Organization { name: 'Volt-Spitzenkandidat' }


In [4]:
# Define prompt
cypher_prompt = ChatPromptTemplate.from_messages([
    ("system", "Given an input question, convert it to a Cypher query. No pre-amble.",),
    ("human", CYPHER_GENERATION_TEMPLATE),
])
# Define chain
cypher_chain = cypher_prompt | model | StrOutputParser()
# Generate Cypher query with found entities
generated_query = cypher_chain.invoke({
    'question': question,
    'entities_list': candidate_context,
    'schema': db.graph.schema
})
print(generated_query)

Complete() is experimental since 1.0.12. Do not use it in production. 


 MATCH (c:Chunk)-[:MENTIONS]->(o:Organization) WHERE o.name IN ['VOLT', 'Volt', 'Europapartei Volt', 'Volt-Partei', 'Volt-Fraktionschef', 'Volt-Bündnis', 'Volt-Mitgründer', 'Partei Volt', 'Euopapartei Volt', 'Volt-Spitzenkandidat'] RETURN c.text LIMIT 10


## Perform query and generate an answer

In [5]:

# Perform query
response = db.query(generated_query)
context = map_records_to_context(response)
print(context)

c.text: Deutschlandweit haben CDU und CSU laut der aktuellen Hochrechnung von Infratest dimap um 23:46 Uhr mit 30,2 Prozent die meisten Stimmen geholt. Dahinter folgt die AfD mit 15,9 Prozent vor der SPD mit 13,9 und Grünen mit 11,9 Prozent. Das Bündnis Sahra Wagenknecht (BSW) holte demnach aus dem Stand 6,0 Prozent, die FDP liegt bei 5,1 Prozent. Die Linke kam auf 2,8 Prozent, die Freien Wähler auf 2,6 Prozent, die Europapartei Volt auf 2,5 Prozent. Die anderen Parteien bewegen sich im einprozentigen Bereich oder darunter.=====c.text: Laut einer Hochrechnung des ZDF können einen ganze Reihe von Kleinparteien mit Sitzen im neuen EU-Parlament rechnen. Demnach bekommen die Freien Wähler und die Europapartei Volt je 3 Sitze, die Satirepartei Partei 2 Sitze und die Tierschutzpartei, die ÖDP und die Familienpartei je einen Sitz.=====c.text: Die Kleinparteien haben bei der Wahl am Sonntag klar zugelegt. Insgesamt stimmten mehr als 20 Prozent aller Wäh­le­r:in­nen für Parteien, die auf jeweil

In [6]:
# Define prompt and chain
ANSWER_PROMPT_TEMPLATE = (
    "Answer the question below in appropriate detail, given the following context. "
    # "Think step by step before providing a detailed answer. "
    "The context was retrieved from the database by the following query:\n\n"
    "Query: {query}\n\n"
    "Context:\n{context}\n\n"
    "Question: {question}\n\n"
    "Answer: "
)
answer_prompt = ChatPromptTemplate.from_template(ANSWER_PROMPT_TEMPLATE)
answer_chain = answer_prompt | model | StrOutputParser()

In [7]:
# Populate context and generate answer
answer = answer_chain.invoke(
    {'question': question, 'context': context, 'query': generated_query}
)
print(answer)

 In the context provided, there are several pieces of information about Volt, a political party mentioned multiple times. Here is a summary of the news about Volt:

1. Volt participated in the European Parliament election and received 2.5% of the votes in Germany.
2. According to a ZDF projection, Volt and the Freien Wähler each won 3 seats in the European Parliament.
3. Volt improved its performance by 1.8% compared to the previous European Parliament election.
4. In the state of Hessen, Volt received 3.3% of the votes, placing it ahead of the Linke (2.5%) and the Freien Wähler (2.0%).
5. In the 2019 European Parliament election, Volt received only 0.7% of the votes.
6. Volt is mentioned as one of the small parties that contributed to the 14.3% share of small parties in the election.

These pieces of information highlight Volt's performance and representation in the European Parliament election and how it compares to previous elections and other political parties.
