# Neo4j Vector



In [6]:
from dotenv import load_dotenv, find_dotenv
from langchain.globals import set_debug

load_dotenv(find_dotenv())
set_debug(False)

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Neo4jVector
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [4]:
loader = TextLoader("../data/state_of_the_union.txt")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

In [5]:
docs[0]

Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their d

In [21]:
# db = Neo4jVector.from_documents(
#     docs, OpenAIEmbeddings(), url=url, username=username, password=password
# )

db = Neo4jVector.from_documents(
    docs, OpenAIEmbeddings(), database="tcm")



In [22]:
query = "What did the president say about Ketanji Brown Jackson"
docs_with_score = db.similarity_search_with_score(query, k=2)

In [13]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)



--------------------------------------------------------------------------------
Score:  0.9076102375984192
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
--------------------------------------------------------------------------------
-----------------------

## 使用vectorstore
上面，我们从零开始创建了一个vectorstore。然而，我们经常想要使用现有的矢量库。为了做到这一点，我们可以直接初始化它。

In [23]:
index_name = "vector"  # default index name
store = Neo4jVector.from_existing_index(
    OpenAIEmbeddings(),
    index_name=index_name,
    database="tcm"
)

In [24]:
# First we create sample data in graph
store.query(
    "CREATE (p:Person {name: 'Tomaz', location:'Slovenia', hobby:'Bicycle', age: 33})"
)

[]

我们也可以使用`from_existing_graph`方法初始化现有图的vectorstore。该方法从数据库中提取相关文本信息，计算文本嵌入并将其存储回数据库。

In [25]:
# Now we initialize from existing graph
existing_graph = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    database="tcm",
    index_name="person_index",
    node_label="Person",
    text_node_properties=["name", "location"],
    embedding_node_property="embedding",
)
result = existing_graph.similarity_search("Slovenia", k=1)
result[0]



Document(metadata={'age': 33, 'hobby': 'Bicycle'}, page_content='\nname: Tomaz\nlocation: Slovenia')

Neo4j还支持关系向量索引，其中嵌入作为关系属性存储并索引。关系向量索引不能通过LangChain填充，但可以将其连接到现有的关系向量索引。

In [26]:
# First we create sample data and index in graph
store.query(
    "MERGE (p:Person {name: 'Tomaz'}) "
    "MERGE (p1:Person {name:'Leann'}) "
    "MERGE (p1)-[:FRIEND {text:'example text', embedding:$embedding}]->(p2)",
    params={"embedding": OpenAIEmbeddings().embed_query("example text")},
)


[]

In [27]:
# Create a vector index
relationship_index = "relationship_vector"
store.query(
    """
CREATE VECTOR INDEX $relationship_index
IF NOT EXISTS
FOR ()-[r:FRIEND]-() ON (r.embedding)
OPTIONS {indexConfig: {
 `vector.dimensions`: 1536,
 `vector.similarity_function`: 'cosine'
}}
""",
    params={"relationship_index": relationship_index},
)

[]

In [28]:
relationship_vector = Neo4jVector.from_existing_relationship_index(
    OpenAIEmbeddings(),
    database="tcm",
    index_name=relationship_index,
    text_node_property="text",
)
relationship_vector.similarity_search("Example")

[Document(page_content='example text')]

## 元数据过滤

Neo4j矢量存储还通过结合并行运行时和精确最近邻搜索来支持元数据过滤。需要Neo4j 5.18或更高版本。
相等过滤具有以下语法。



In [29]:
existing_graph.similarity_search(
    "Slovenia",
    filter={"hobby": "Bicycle", "name": "Tomaz"},
)

[Document(metadata={'age': 33, 'hobby': 'Bicycle'}, page_content='\nname: Tomaz\nlocation: Slovenia')]

In [35]:
existing_graph.similarity_search(
    "Slovenia",
    filter={"hobby": "Bicycle", "name": "Tomaz"},
)

[Document(metadata={'age': 33, 'hobby': 'Bicycle'}, page_content='\nname: Tomaz\nlocation: Slovenia')]

元数据过滤还支持以下运算符：
- $eq: Equal
- $ne: Not Equal
- $lt: Less than
- $lte: Less than or equal
- $gt: Greater than
- $gte: Greater than or equal
- $in: In a list of values
- $nin: Not in a list of values
- $between: Between two values
- $like: Text contains value
- $ilike: lowered text contains value

In [30]:
existing_graph.similarity_search(
    "Slovenia",
    filter={"hobby": {"$eq": "Bicycle"}, "age": {"$gt": 15}},
)

[Document(metadata={'age': 33, 'hobby': 'Bicycle'}, page_content='\nname: Tomaz\nlocation: Slovenia')]

您还可以`OR`在过滤器之间使用运算符

In [31]:
existing_graph.similarity_search(
    "Slovenia",
    filter={"$or": [{"hobby": {"$eq": "Bicycle"}}, {"age": {"$gt": 15}}]},
)


[Document(metadata={'age': 33, 'hobby': 'Bicycle'}, page_content='\nname: Tomaz\nlocation: Slovenia')]

**添加**
我们可以将文档添加到现有的矢量存储中。

In [32]:
store.add_documents([Document(page_content="foo")])



['acbd18db4cc2f85cedef654fccc4a4d8']

In [33]:
docs_with_score = store.similarity_search_with_score("foo")
docs_with_score

[(Document(page_content='foo'), 0.9999997615814209),
 (Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \n\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \n\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n\nWe’re securing commitments and supporting partners in South and Central A

## 使用检索查询自定义响应
您还可以通过使用自定义Cypher片段来定制响应，该片段可以从图中获取其他信息。在底层，最后的Cypher语句是这样构造的


```
read_query = (
  "CALL db.index.vector.queryNodes($index, $k, $embedding) "
  "YIELD node, score "
) + retrieval_query
```

检索查询必须返回以下三列
- text: Union[str, Dict] =用于填充文档页面内容的值
- score: Float =相似度评分
- metadata: Dict =文档的附加元数据

In [34]:
retrieval_query = """
RETURN "Name:" + node.name AS text, score, {foo:"bar"} AS metadata
"""
retrieval_example = Neo4jVector.from_existing_index(
    OpenAIEmbeddings(),
    database="tcm",
    index_name="person_index",
    retrieval_query=retrieval_query,
)
retrieval_example.similarity_search("Foo", k=1)

[Document(metadata={'foo': 'bar'}, page_content='Name:Tomaz')]

下面是传递除作为字典嵌入到文本列之外的所有节点属性的示例

In [37]:
retrieval_query = """
RETURN node {.name, .age, .hobby} AS text, score, {foo:"bar"} AS metadata
"""
retrieval_example = Neo4jVector.from_existing_index(
    OpenAIEmbeddings(),
    database="tcm",
    index_name="person_index",
    retrieval_query=retrieval_query,
)
retrieval_example.similarity_search("Foo", k=1)

[Document(metadata={'foo': 'bar'}, page_content='name: Tomaz\nage: 33\nhobby: Bicycle\n')]

还可以将Cypher参数传递给检索查询。参数可用于额外的过滤、遍历等…

In [38]:
retrieval_query = """
RETURN node {.*, embedding:Null, extra: $extra} AS text, score, {foo:"bar"} AS metadata
"""
retrieval_example = Neo4jVector.from_existing_index(
    OpenAIEmbeddings(),
    database="tcm",
    index_name="person_index",
    retrieval_query=retrieval_query,
)
retrieval_example.similarity_search("Foo", k=1, params={"extra": "ParamInfo"})

[Document(metadata={'foo': 'bar'}, page_content='location: Slovenia\nextra: ParamInfo\nname: Tomaz\nage: 33\nhobby: Bicycle\nembedding: None\n')]

## 混合搜索(矢量+关键词)
Neo4j集成了矢量和关键字索引，这允许您使用混合搜索方法

In [40]:
# The Neo4jVector Module will connect to Neo4j and create a vector and keyword indices if needed.
hybrid_db = Neo4jVector.from_documents(
    docs,
    OpenAIEmbeddings(),
    database="tcm",
    search_type="hybrid",
)





In [41]:
##要从现有索引加载混合搜索，您必须同时提供向量和关键字索引
index_name = "vector"  # default index name
keyword_index_name = "keyword"  # default keyword index name

store = Neo4jVector.from_existing_index(
    OpenAIEmbeddings(),
    database="tcm",
    index_name=index_name,
    keyword_index_name=keyword_index_name,
    search_type="hybrid",
)

In [42]:
retriever = store.as_retriever()
retriever.invoke(query)[0]

Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.')

## 用资料回答问题
本节介绍如何对索引上的源进行问答。它通过使用`RetrievalQAWithSourcesChain`来实现这一点，它从索引中查找文档

In [43]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI

chain = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), chain_type="stuff", retriever=retriever
)

In [44]:
chain.invoke(
    {"question": "What did the president say about Justice Breyer"},
    return_only_outputs=True,
)

{'answer': 'The president honored Justice Stephen Breyer for his service to the country and mentioned his retirement from the United States Supreme Court.\n',
 'sources': '../data/state_of_the_union.txt'}