In [2]:
import importlib
import movies
import data_describe
import os

importlib.reload(movies)

MoviesKG = movies.MoviesKG
DataDescribe = data_describe.DataDescribe

In [3]:
# # 创建实例并保存数据到JSON文件
# movies_kg = MoviesKG(download=True)
# movies_kg.save_to_json()

movies_kg = MoviesKG()
# movies_kg.clear_database()
# movies_kg.filter_data()
# movies_kg.build_graph()
movies_kg.get_node_counts_by_label()

[{'label': 'Movie', 'node_count': 1967},
 {'label': 'Genre', 'node_count': 19},
 {'label': 'ProductionCompany', 'node_count': 2611},
 {'label': 'Country', 'node_count': 54},
 {'label': 'Person', 'node_count': 17540},
 {'label': 'User', 'node_count': 100}]

In [43]:
from langchain_core.prompts.prompt import PromptTemplate
import string


class FormatDict(dict):
    def __missing__(self, key):
        return "{" + key + "}"


### default template
# CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
# Instructions:
# Use only the provided relationship types and properties in the schema.
# Do not use any other relationship types or properties that are not provided.
# Schema:
# {schema}
# Note: Do not include any explanations or apologies in your responses.
# Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
# Do not include any text except the generated Cypher statement.

# The question is:
# {question}"""

###
CYPHER_GENERATION_TEMPLATE = """
Task: Generate Cypher statement to query a graph database with fuzzy search.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Use a similarity threshold of {similarity_threshold} for the levenshteinSimilarity function when matching string fields.
Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

Examples:
question: Who starred in 'The Dark Knight'?
response: MATCH (p:Person)-[:ACTED_IN]->(m:Movie) WHERE apoc.text.levenshteinSimilarity(m.title, 'The Dark Knight') >= {similarity_threshold} RETURN p.name;

question: Which movies did Christopher Nolan direct?
response: MATCH (d:Director)-[:DIRECTOR]->(m:Movie) WHERE apoc.text.levenshteinSimilarity(d.name, 'Christopher Nolan') >= {similarity_threshold} RETURN m.title;

question: What other films has the director of 'The Dark Knight' directed?
response: MATCH (m:Movie) WHERE apoc.text.levenshteinSimilarity(m.title, 'The Dark Knight') >= {similarity_threshold} MATCH (m)<-[:DIRECTOR]-(p:Person)-[:DIRECTOR]->(otherMovies:Movie) RETURN otherMovies.title

question: Which movies did user born in year 2001 like?
response: MATCH (u:User)-[:LIKED]->(m:Movie) WHERE u.birthYear = 2001 RETURN m.title;

question: Where do user who like the director Christopher Nolan live?
response: MATCH (u:User)-[:LIKED]->(:Movie)<-[:DIRECTOR]-(d:Director) WHERE apoc.text.levenshteinSimilarity(d.name, 'Christopher Nolan') >= {similarity_threshold} RETURN u.location;

The question is:
{question}
"""
formatter = string.Formatter()

CYPHER_GENERATION_TEMPLATE = formatter.vformat(
    CYPHER_GENERATION_TEMPLATE, (), FormatDict(similarity_threshold=0.9)
)


CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

### default template


# CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
# The information part contains the provided information that you must use to construct an answer.
# The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
# Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
# Here is an example:

# Question: Which managers own Neo4j stocks?
# Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
# Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.

# Follow this example when generating answers.
# If the provided information is empty, say that you don't know the answer.
# Information:
# {context}

# Question: {question}
# Helpful Answer:"""

CYPHER_QA_TEMPLATE = """
You are an assistant that helps to form nice and human understandable answers.
The information part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.

Examples:
Question: Which managers own Neo4j stocks?
Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.

Question: Which movies did user born in year 2001 like?
Context:[{{'m.title': 'Avengers 5'}}, {{'m.title': 'The Twilight Saga: New Moon'}}, {{'m.title': 'Rocky'}}]
Helpful Answer: The movies that a user born in 2001 liked include 'Avengers 5', 'The Twilight Saga: New Moon' and 'Rocky'.

Follow this example when generating answers.
If the provided information is empty, say that you don't know the answer.
Information:
{context}

Question: {question}
Helpful Answer:
"""


CYPHER_QA_PROMPT = PromptTemplate(
    input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
)

In [44]:
from langchain_community.chat_models import ChatZhipuAI
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph

llm = ChatZhipuAI(
    temperature=0,
    zhipuai_api_key=os.getenv("ZHIPUAI_API_KEY"),
    model_name="GLM-4-0520",
    # model_name="GLM-4-Flash",
)
graph = Neo4jGraph(
    url="bolt://localhost:7687", username="neo4j", password=os.getenv("NEO4J_PASSWORD")
)
chain = GraphCypherQAChain.from_llm(
    llm,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    qa_prompt=CYPHER_QA_PROMPT,
    graph=graph,
    verbose=True,
)


def get_answer(question, choice=""):
    if choice:
        return chain.invoke(choice)["result"]
    return chain.invoke(question)["result"]

In [45]:
# movies_kg.add_user_preferences()

In [46]:
print(graph.get_schema)

Node properties:
Movie {budget: INTEGER, id: INTEGER, title: STRING, tagline: STRING, belongs_to_collection: STRING, runtime: INTEGER, overview: STRING, revenue: INTEGER, release_date: STRING}
Genre {name: STRING}
ProductionCompany {origin_country: STRING, name: STRING, id: INTEGER}
Country {name: STRING}
Person {name: STRING, id: INTEGER, gender: INTEGER}
User {birthYear: INTEGER, location: STRING, name: STRING, id: INTEGER, gender: STRING}
Relationship properties:
ACTED_IN {character: STRING}
The relationships:
(:Movie)-[:BELONGS_TO_GENRE]->(:Genre)
(:Movie)-[:PRODUCED_BY]->(:ProductionCompany)
(:Movie)-[:ORIGIN_COUNTRY]->(:Country)
(:Person)-[:PRODUCER]->(:Movie)
(:Person)-[:EXECUTIVE_PRODUCER]->(:Movie)
(:Person)-[:DIRECTOR]->(:Movie)
(:Person)-[:ACTED_IN]->(:Movie)
(:User)-[:HATED]->(:Movie)
(:User)-[:DISLIKED]->(:Movie)
(:User)-[:LOVED]->(:Movie)
(:User)-[:IGNORED]->(:Movie)
(:User)-[:LIKED]->(:Movie)


In [57]:
from enum import Enum


class QUESTION(str, Enum):
    # 简单单跳问题
    Q11 = "Who starred in 'The Dark Knight'?"
    Q12 = "Who directed 'The Dark Knight'?"
    Q13 = "Which movies did Christopher Nolan direct?"
    Q14 = "Which movies did Christopher Noran direct?"

    # 添加筛选条件
    Q21 = (
        "Besides 'The Dark Knight', which other movies has Christopher Nolan directed?"
    )
    Q22 = "In which movies have Michael Caine and Maggie Gyllenhaal co-starred?"

    Q31 = "What other films has the director of 'Kingdom of the Planet of the Apes' directed?"
    Q32 = "What other films has the director of 'Kingdon of the Planett of the Apes' directed?"
    Q33 = "What other films has the director of 'Kingdom of the Apes' directed?"
    Q34 = "What is the revenue of 'Kingdom of the Planet of the Apes'?"


default_questions = [question.value for question in QUESTION]

In [58]:
print(get_answer(QUESTION.Q32.value))



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie) WHERE apoc.text.levenshteinSimilarity(m.title, 'Kingdon of the Planett of the Apes') >= 0.9 MATCH (m)<-[:DIRECTOR]-(p:Person)-[:DIRECTOR]->(otherMovies:Movie) RETURN otherMovies.title[0m
Full Context:
[32;1m[1;3m[{'otherMovies.title': 'Maze Runner: The Death Cure'}, {'otherMovies.title': 'Maze Runner: The Scorch Trials'}, {'otherMovies.title': 'The Maze Runner'}][0m

[1m> Finished chain.[0m
The director of 'Kingdom of the Planet of the Apes' has also directed 'Maze Runner: The Death Cure', 'Maze Runner: The Scorch Trials', and 'The Maze Runner'.


In [49]:
# for question in default_questions:
#     print(question)
#     print(get_answer(question))
#     print("--------------------------------------------------------")

In [50]:
class RecommendationQuestions(str, Enum):
    Q1 = "Which directors does User 5 love?"

    Q21 = "Which movies did user born in year 2001 like?"
    Q22 = "Which movies did user born in year 1996 like?"
    Q31 = "Which films should person born in year 1972 not watch?"

    Q3 = "Which user likes the director Christopher Nolan's films?"

    Q40 = "What are the films that people like, give 10 films with their directors?"
    Q41 = "Where do user who like the director Christopher Nolan live?"
    Q42 = "Where do people who like the director Yorgos Lanthimos live?"

    Q5 = "What is the gender distribution of users who love films?"

    Q6 = "Who are the people that liked Action, Adventure films?"


user_questions = [question.value for question in RecommendationQuestions]

In [51]:
print(get_answer(RecommendationQuestions.Q42.value))



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (u:User)-[:LIKED]->(:Movie)<-[:DIRECTOR]-(d:Person) WHERE apoc.text.levenshteinSimilarity(d.name, 'Yorgos Lanthimos') >= 0.9 RETURN u.location;[0m
Full Context:
[32;1m[1;3m[{'u.location': 'Australia'}][0m

[1m> Finished chain.[0m
People who like the director Yorgos Lanthimos live in Australia.


In [52]:
for question in user_questions:
    print(question)
    print(get_answer(question))
    print("--------------------------------------------------------")

Which directors does User 5 love?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (u:User {id: 5})-[:LOVED]->(:Movie)<-[:DIRECTOR]-(d:Person) RETURN d.name;[0m
Full Context:
[32;1m[1;3m[{'d.name': 'David Gregory'}, {'d.name': 'Olivier Megaton'}, {'d.name': 'Katsuhiko Fujii'}, {'d.name': 'Lee Isaac Chung'}][0m

[1m> Finished chain.[0m
User 5 loves the directors David Gregory, Olivier Megaton, Katsuhiko Fujii, and Lee Isaac Chung.
--------------------------------------------------------
Which movies did user born in year 2001 like?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (u:User)-[:LIKED]->(m:Movie) WHERE u.birthYear = 2001 RETURN m.title;[0m
Full Context:
[32;1m[1;3m[{'m.title': 'Avengers 5'}, {'m.title': 'The Twilight Saga: New Moon'}, {'m.title': 'Rocky'}, {'m.title': 'Triangle'}, {'m.title': 'The Dark Kingdom'}, {'m.title': 'Saw IV'}, {'m.title': 'The SpongeBob Movie: Sponge on the Ru

拒绝回答的例子

```p
Which films should person born in year 1972 not watch?


> Entering new GraphCypherQAChain chain...
Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The semantics of using colon in the separation of alternative relationship types will change in a future version. (Please use ':HATED|DISLIKED' instead)} {position: line: 1, column: 41, offset: 40} for query: 'MATCH (u:User {birthYear: 1972})-[:HATED|:DISLIKED]->(m:Movie) RETURN m.title;'
Generated Cypher:
MATCH (u:User {birthYear: 1972})-[:HATED|:DISLIKED]->(m:Movie) RETURN m.title;
Full Context:
[{'m.title': 'Mobile Suit Gundam SEED FREEDOM'}, {'m.title': 'Stay'}]

> Finished chain.
I don't know the answer.
```

In [53]:
# import gradio as gr

# # Corrected Gradio interface setup
# iface = gr.Interface(
#     fn=get_answer,
#     inputs=[
#         gr.Dropdown(
#             choices=default_questions,
#             label="Select a question",
#             value=default_questions[0],
#         ),  # 默认选择第一个问题
#         gr.Textbox(lines=2, label="Or enter your question here..."),
#     ],
#     outputs=gr.Textbox(lines=10, label="Output"),  # 增加输出行数
#     title="GraphCypher QA System",
#     description="Ask any question related to the movie database.",
# )


# # Launch the Gradio app
# iface.launch(share=True)

In [54]:
from langchain_community.chains.graph_qa.cypher import construct_schema

schema = construct_schema(graph.get_structured_schema, [], [])
print(schema)

Node properties are the following:
Movie {budget: INTEGER, id: INTEGER, title: STRING, tagline: STRING, belongs_to_collection: STRING, runtime: INTEGER, overview: STRING, revenue: INTEGER, release_date: STRING},Genre {name: STRING},ProductionCompany {origin_country: STRING, name: STRING, id: INTEGER},Country {name: STRING},Person {name: STRING, id: INTEGER, gender: INTEGER},User {birthYear: INTEGER, location: STRING, name: STRING, id: INTEGER, gender: STRING}
Relationship properties are the following:
ACTED_IN {character: STRING}
The relationships are the following:
(:Movie)-[:BELONGS_TO_GENRE]->(:Genre),(:Movie)-[:PRODUCED_BY]->(:ProductionCompany),(:Movie)-[:ORIGIN_COUNTRY]->(:Country),(:Person)-[:PRODUCER]->(:Movie),(:Person)-[:EXECUTIVE_PRODUCER]->(:Movie),(:Person)-[:DIRECTOR]->(:Movie),(:Person)-[:ACTED_IN]->(:Movie),(:User)-[:HATED]->(:Movie),(:User)-[:DISLIKED]->(:Movie),(:User)-[:LOVED]->(:Movie),(:User)-[:IGNORED]->(:Movie),(:User)-[:LIKED]->(:Movie)
