<!-- @format -->

# Prompt 戦略による Graph-RAG

- プロンプトを工夫することで Graph-RAG を実現


In [None]:
import os

from dotenv import load_dotenv
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from phoenix.trace.langchain import LangChainInstrumentor

In [None]:
load_dotenv("../.env")

In [None]:
os.environ["PHOENIX_PROJECT_NAME"] = "2.2 prompt strategies"
LangChainInstrumentor().instrument()

In [None]:
graph = Neo4jGraph()

<!-- @format -->

## GraphCypherQAChain による RAG

- `GraphCypherQAChain`は、graph スキーマとプロンプトから cypher クエリを生成（text2cypher）し、回答まで取得


In [None]:
llm = AzureChatOpenAI(
    azure_deployment="gpt-35-turbo",
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["OPENAI_API_VERSION"],
    temperature=0.0,
)
chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)

<!-- @format -->

### システムプロンプト


<!-- @format -->

- user query -> cypher query(neo4j へのクエリ)を生成するプロンプト


In [None]:
print(chain.cypher_generation_chain.prompt.template)

<!-- @format -->

- グラフのスキーマ情報を提示するプロンプト


In [None]:
print(chain.graph_schema)

<!-- @format -->

- クエリ結果から回答を生成するためのプロンプト


In [None]:
print(chain.qa_chain.prompt.template)

<!-- @format -->

### 回答結果


In [None]:
chain.invoke({"query": "how many artists are there?"})

In [None]:
chain.invoke({"query": "Toy Storyの公開日は？"})

In [None]:
chain.invoke({"query": "1996年に公開された映画で評価の高いものを5つ教えて"})

<!-- @format -->

---

## Few-shot examples

- ユーザー質問と Cypher クエリ文の例をあらかじめ用意して、Few-shot プロンプトで実現


In [None]:
examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
    },
    {
        "question": "Which actors played in the movie Casino?",
        "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
    },
    {
        "question": "How many movies has Tom Hanks acted in?",
        "query": "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
    },
    {
        "question": "List all the genres of the movie Schindler's List",
        "query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
    },
    {
        "question": "Which actors have worked in movies from both the comedy and action genres?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
    },
    {
        "question": "Which directors have made movies with at least three different actors named 'John'?",
        "query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
    },
    {
        "question": "Identify movies where directors also played a role in the film.",
        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
    },
    {
        "question": "Find the actor with the highest number of movies in the database.",
        "query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
    },
    {
        "question": "How many movies were released in October 1995?",
        "query": "MATCH (m:Movie) WHERE m.released.year = 1995 and m.released.month = 10 RETURN COUNT(m)",
    },
]

In [None]:
example_prompt = PromptTemplate.from_template(
    template="""User input: {question}
    Cypher query: {query}""",
)

prefix_prompt = """You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
{schema}.

Below are a number of examples of questions and their corresponding Cypher queries."""

suffix_prompt = """User input: {question}
Cypher query: """

prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix=prefix_prompt,
    suffix=suffix_prompt,
    input_variables=["question", "schema"],
)

In [None]:
print(
    prompt.format(question="How many artists are there?", schema=graph.schema)
)

In [None]:
chain_fewshot = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    cypher_prompt=prompt,
    verbose=True,
)

In [None]:
chain_fewshot.invoke({"query": "how many artists are there?"})

In [None]:
chain_fewshot.invoke({"query": "Toy Storyに出演した俳優は？"})

In [None]:
chain_fewshot.invoke(
    {"query": "1996年に公開された映画で評価の高いものを5つ教えて"}
)

<!-- @format -->

---

## Dynamic few-shot examples

- Fewshot-prompt の例が十分にある場合、トークン消費を避ける、かつモデルの混乱を避けるため、関連性の高いプロンプトのみを対象としたい
- これを実現するために、Embeddding モデルでユーザークエリと関連するプロンプトを選択する `SemanticSimilarityExampleSelector`を利用


In [None]:
embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
)
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    embeddings,
    Neo4jVector,
    k=5,
    input_keys=["question"],
)

In [None]:
example_selector.select_examples({"question": "how many artists are there?"})

In [None]:
prompt_dynamic = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix=prefix_prompt,
    suffix=suffix_prompt,
    input_variables=["question", "schema"],
)

In [None]:
chain_dynamic = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    cypher_prompt=prompt_dynamic,
    verbose=True,
)

In [None]:
chain_dynamic.invoke({"query": "how many artists are there?"})

In [None]:
chain_dynamic.invoke({"query": "Toy Storyに出演した俳優は？"})

In [None]:
chain_dynamic.invoke(
    {"query": "1996年に公開された映画で評価の高いものを5つ教えて"}
)