In [13]:
import os
from dotenv import load_dotenv

# 加载环境变量
load_dotenv()
# 从环境变量中读取api_key
api_key = os.getenv('ZHIPU_API_KEY')
base_url = "https://open.bigmodel.cn/api/paas/v4/"
chat_model = "glm-4-flash"
emb_model = "embedding-2"

# 配置对话模型
from llama_index.llms.zhipuai import ZhipuAI
llm = ZhipuAI(
    api_key = api_key,
    model = chat_model,
)

# 配置嵌入模型
from llama_index.embeddings.zhipuai import ZhipuAIEmbedding
embedding = ZhipuAIEmbedding(
    api_key = api_key,
    model = emb_model,
)
emb = embedding.get_text_embedding("你好呀呀")


做RAG需要自己准备一个txt文档，新建一个docs文件夹，放进去。例如，这里放了一个./docs/问答手册.txt

In [4]:
# 从指定文件读取，输入为List
from llama_index.core import SimpleDirectoryReader,Document
documents = SimpleDirectoryReader(input_files=['./docs/问答手册.txt']).load_data()

方法一：Documents可以直接构建index


In [5]:
# 构建向量索引
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents,embed_model=embedding)
# 想要看到进度条的话，加一个参数 show_progress=True
# index = VectorStoreIndex.from_documents(documents,embed_model=embedding,show_progress=True)

方法二：可以先构建节点，再构建索引，同时采用faiss作为向量存储库


In [6]:
# 构建节点
from llama_index.core.node_parser import SentenceSplitter
transformations = [SentenceSplitter(chunk_size = 512)]

from llama_index.core.ingestion.pipeline import run_transformations
nodes = run_transformations(documents, transformations=transformations)

根据节点构建索引


In [9]:
# 构建索引
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
from llama_index.core import StorageContext, VectorStoreIndex

# 从上一节得知，智谱embedding-2的维度是1024
dimensions = len(emb)
vector_store = FaissVectorStore(faiss_index=faiss.IndexFlatL2(dimensions))
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex(
    nodes = nodes,
    storage_context=storage_context,
    embed_model = embedding,
)

这样索引就算是建成了。我们可以把索引存储到硬盘，这样以后就不用重复构建，直接从硬盘读取。

In [10]:
# save index to disk
persist_dir = "./storage"
index.storage_context.persist(persist_dir)

如果之前有保存过索引到硬盘，可以直接读取。


In [11]:
# load index from disk
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
from llama_index.core import StorageContext, load_index_from_storage
vector_store = FaissVectorStore.from_persist_dir(persist_dir)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir=persist_dir
)
index = load_index_from_storage(storage_context=storage_context,embed_model = embedding)

index可以直接做问答引擎。


In [14]:
query_engine = index.as_query_engine(llm=llm)
# 回答提问
response = query_engine.query("专利申请如何收费？")
response

Response(response='专利申请的收费通常包括官方费用和代理费用两部分。官方费用包括申请费、审查费、授权费等，具体收费标准由各国专利局规定，不同国家可能有所不同。代理费用则由专利代理机构或代理人根据服务内容、工作量等因素自行定价。在提交专利申请时，建议咨询专业的专利代理机构以获取详细费用信息。', source_nodes=[NodeWithScore(node=TextNode(id_='648f86a2-27a1-45a3-b59b-79723b76b3e0', embedding=None, metadata={'file_path': 'docs/问答手册.txt', 'file_name': '问答手册.txt', 'file_type': 'text/plain', 'file_size': 3355, 'creation_date': '2025-03-20', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='225df03b-8a2f-48c8-88c5-ba8239dca542', node_type='4', metadata={'file_path': 'docs/问答手册.txt', 'file_name': '问答手册.txt', 'file_type': 'text/plain', 'file_size': 3355, 'creation_date': '2025-03-20', 'last_modified_date': '2025-03-20'

response.text 中是回答的文本。response.source_nodes是检索到的文本块儿，每个文本块都有score，代表与问题的相关性，由向量计算得出。

方法三：我们也可以先构建索引器，再构建合成器，再组装成问答引擎。

In [15]:
# 构建检索器
from llama_index.core.retrievers import VectorIndexRetriever
# 想要自定义参数，可以构造参数字典
kwargs = {'similarity_top_k': 5, 'index': index, 'dimensions': dimensions} # 必要参数
retriever = VectorIndexRetriever(**kwargs)

In [16]:
# 构建合成器
from llama_index.core.response_synthesizers  import get_response_synthesizer
response_synthesizer = get_response_synthesizer(llm=llm)

In [17]:
# 构建问答引擎
from llama_index.core.query_engine import RetrieverQueryEngine
engine = RetrieverQueryEngine(
      retriever=retriever,
      response_synthesizer=response_synthesizer
        )

In [18]:
# 提问
question = "请问商标注册需要提供哪些文件？"
answer = engine.query(question)
print(answer.response)

商标注册通常需要提交以下文件：

1. 商标注册申请书：详细填写商标的名称、使用商品或服务类别、申请人名称、地址等信息。

2. 商标图样：提供清晰的商标图样，应保证颜色、形状、比例等与实际使用一致。

3. 申请人身份证明文件：个人申请需提供身份证复印件，企业申请需提供营业执照副本复印件。

4. 使用证明文件：若商标已在市场中使用，需提交相应的使用证明文件，如销售合同、广告宣传资料等。

5. 商标代理委托书：如申请人委托代理人办理商标注册事宜，需提供委托书。

6. 其他证明文件：根据具体情况，可能还需提交其他证明文件，如商品或服务分类的相关证明、商标设计说明等。

请注意，不同国家和地区的商标注册要求和文件可能有所不同，具体请咨询当地商标局或专业机构。


在商标注册中，对于企业来说，需要提供的文件包括：
- 被申请人提供的营业执照复印件；
- 授权委托书；
- 商标图案的电子版；
- 具体商品或服务的名称。

若是国内自然人申请商标，则需提供以下文件：
- 个体工商户档案及自然人身份证复印件；
- 授权委托书；
- 商标图案的电子版；
- 具体商品或服务的名称。

国外自然人则需要提供：
- 护照；
- 授权委托书；
- 及商标图案的电子版；
- 具体商品或服务的名称。

### 方法四：利用Qdrant向量库

先安装一下

In [19]:
%pip install qdrant-client
%pip install llama-index-vector-stores-qdrant
%pip install llama-index-readers-file

Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
Collecting qdrant-client
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/dd/b4/bd676f91f5234ab59282e4a110f324029684482cbe08e7a1c77b6338013b/qdrant_client-1.13.3-py3-none-any.whl (306 kB)
Collecting grpcio>=1.41.0 (from qdrant-client)
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/5d/b7/7e7b7bb6bb18baf156fd4f2f5b254150dcdd6cbf0def1ee427a2fb2bfc4d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m101.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting grpcio-tools>=1.41.0 (from qdrant-client)
  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e3/4b/d95be4aaf78d7b02dff3bd332c75c228288178e92af0e5228759ac5002a0/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

加载文档


In [20]:
import qdrant_client
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(
    input_files=['./docs/问答手册.txt']
).load_data()

print("Document ID:", documents[0].doc_id)

Document ID: 4b587f56-90a3-45d9-9d87-a4282fe32a7c


Document ID: 02572b3e-18f7-4b5e-b432-3e1ed9ba89b8

构建索引

In [21]:
# Create an index over the documents
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore

# 连接Qdrant，并保存在本地的qdrant文件夹中
qclient = qdrant_client.QdrantClient(path="qdrant")
vector_store = QdrantVectorStore(client=qclient, collection_name="wenda")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, 
    storage_context=storage_context,
    embed_model = embedding
)

  self._client.create_payload_index(


构建检索器


In [22]:
# 构建检索器
from llama_index.core.retrievers import VectorIndexRetriever
# 想要自定义参数，可以构造参数字典
kwargs = {'similarity_top_k': 5, 'index': index, 'dimensions': dimensions} # 必要参数
retriever = VectorIndexRetriever(**kwargs)

构建合成器


In [23]:
# 构建合成器
from llama_index.core.response_synthesizers  import get_response_synthesizer
response_synthesizer = get_response_synthesizer(llm=llm)

构建问答引擎

In [24]:
# 构建问答引擎
from llama_index.core.query_engine import RetrieverQueryEngine
engine = RetrieverQueryEngine(
      retriever=retriever,
      response_synthesizer=response_synthesizer,
        )

提问

In [25]:
# 提问
question = "What are the applications of Agent AI systems ?"
answer = engine.query(question)
print(answer.response)

Agent AI systems have applications across various domains, including driving content generation for bots and AI agents, enhancing productivity in scenarios like re-playing, paraphrasing, action prediction, and synthesizing 3D or 2D scenarios. They also contribute to interactive AI, health topic management, transforming the gaming industry by redefining developer roles, and reshaping manufacturing through adaptive robotic systems.


Agent AI systems have a variety of applications, which include:

1. Interactive AI: Enhancing user interactions and providing personalized experiences.
2. Content Generation: Assisting in the creation of content for bots and AI agents, which can be used in various applications such as customer service or storytelling.
3. Productivity: Improving productivity in applications by enabling tasks like replaying events, paraphrasing information, predicting actions, and synthesizing scenarios (both 3D and 2D).
4. Healthcare: Ethical deployment in sensitive domains like healthcare, which could potentially improve diagnoses and patient care while also addressing health disparities.
5. Gaming Industry: Transforming the role of developers by shifting focus from scripting non-player characters to refining agent learning processes.
6. Robotics and Manufacturing: Redefining manufacturing roles and requiring new skill sets, rather than replacing human workers, as adaptive robotic systems are developed.
7. Simulation: Learning collaboration policies within simulated environments, which can be applied to the real world with careful consideration and safety measures.

Qdrant是支持metadata filter的，我们可以在构建索引的时候，给每个文档添加metadata，然后在查询的时候，指定metadata filter。

In [26]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

根据上面的nodes，构建索引。


In [27]:
vector_store = QdrantVectorStore(client=qclient, collection_name="filter")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(
    nodes, 
    storage_context=storage_context,
    embed_model = embedding
)

然后我们就可以构建metadata filter了。


In [28]:
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", operator=FilterOperator.EQ, value="Mafia"),
    ]
)

把构建好的filter当作参数，构建retriever。进行检索，查看一下结果。


In [29]:
retriever = index.as_retriever(filters=filters, llm=llm)
retriever.retrieve("What is inception about?")

[NodeWithScore(node=TextNode(id_='c0b9055d-7317-4f99-8392-b4fe3fdd762d', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia', 'year': 1972}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='The Godfather', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.27578755699862606),
 NodeWithScore(node=TextNode(id_='7316d5ec-53ff-467d-9187-78ccc6c8ec5b', embedding=None, metadata={'author': 'Harper Lee', 'theme': 'Mafia', 'year': 1960}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='To Kill a Mockingbird', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.24003890332847805)]

[NodeWithScore(node=TextNode(id_='37eb454b-8626-4907-b19d-0c693e8cdab3', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia', 'year': 1972}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.43338348085207457),
 NodeWithScore(node=TextNode(id_='0d33fe2e-d511-400d-a314-5dab62911afc', embedding=None, metadata={'author': 'Harper Lee', 'theme': 'Mafia', 'year': 1960}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='To Kill a Mockingbird', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.4314900148435552)]


我们还可以用AND或者OR来组合多个filter。

In [30]:
 from llama_index.core.vector_stores import FilterOperator, FilterCondition

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.AND,
)

retriever = index.as_retriever(filters=filters, llm=llm)
retriever.retrieve("Harry Potter?")

[NodeWithScore(node=TextNode(id_='2d2d90bf-bbd2-45ea-a974-d3638d4e2384', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Inception', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.27688714316899976)]

[NodeWithScore(node=TextNode(id_='62204c33-04de-4d4a-b311-41ed54d9ba27', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.250045814238684)]

我们也可以直接把filter的字典作为参数，构建retriever。这样可以构建一个更复杂的filter。

In [31]:
retriever = index.as_retriever(
    vector_store_kwargs={"filter": {"theme": "Mafia"}},
    llm=llm
)
retriever.retrieve("What is inception about?")

[NodeWithScore(node=TextNode(id_='2d2d90bf-bbd2-45ea-a974-d3638d4e2384', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Inception', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.6196639144997296),
 NodeWithScore(node=TextNode(id_='25367f9d-81d4-4a78-846c-46a5b9a8e927', embedding=None, metadata={'author': 'George Orwell', 'theme': 'Totalitarianism', 'year': 1949}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='1984', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.348853014289105)]

[NodeWithScore(node=TextNode(id_='a681947d-5d5e-43c6-89ba-25bfae2fb882', embedding=None, metadata={'author': 'Stephen King', 'theme': 'Friendship', 'year': 1994}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='The Shawshank Redemption', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.4834685059141362),
 NodeWithScore(node=TextNode(id_='37eb454b-8626-4907-b19d-0c693e8cdab3', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia', 'year': 1972}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.43338348085207457)]


除了llama-index提供的检索方式，我们还可以利用Qdrant自带的检索能力。就是Default Qdrant Filters

In [32]:
nodes = [
    TextNode(
        text="りんごとは",
        metadata={"author": "Tanaka", "fruit": "apple", "city": "Tokyo"},
    ),
    TextNode(
        text="Was ist Apfel?",
        metadata={"author": "David", "fruit": "apple", "city": "Berlin"},
    ),
    TextNode(
        text="Orange like the sun",
        metadata={"author": "Jane", "fruit": "orange", "city": "Hong Kong"},
    ),
    TextNode(
        text="Grape is...",
        metadata={"author": "Jane", "fruit": "grape", "city": "Hong Kong"},
    ),
    TextNode(
        text="T-dot > G-dot",
        metadata={"author": "George", "fruit": "grape", "city": "Toronto"},
    ),
    TextNode(
        text="6ix Watermelons",
        metadata={
            "author": "George",
            "fruit": "watermelon",
            "city": "Toronto",
        },
    ),
]

继续构建向量库。


In [33]:
vector_store = QdrantVectorStore(client=qclient, collection_name="default")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(
    nodes, 
    storage_context=storage_context,
    embed_model = embedding
)

构建Qdrant自己的的filter。


In [34]:
from qdrant_client.http.models import Filter, FieldCondition, MatchValue
filters = Filter(
    should=[
        Filter(
            must=[
                FieldCondition(
                    key="fruit",
                    match=MatchValue(value="apple"),
                ),
                FieldCondition(
                    key="city",
                    match=MatchValue(value="Tokyo"),
                ),
            ]
        ),
        Filter(
            must=[
                FieldCondition(
                    key="fruit",
                    match=MatchValue(value="grape"),
                ),
                FieldCondition(
                    key="city",
                    match=MatchValue(value="Toronto"),
                ),
            ]
        ),
    ]
)

构建retriever。

In [35]:
retriever = index.as_retriever(
    vector_store_kwargs={"qdrant_filters": filters},
    llm=llm
)

检索一下看看

In [36]:
response = retriever.retrieve("Who makes grapes?")
for node in response:
    print("node", node.score)
    print("node", node.text)
    print("node", node.metadata)

node 0.3977733458258037
node T-dot > G-dot
node {'author': 'George', 'fruit': 'grape', 'city': 'Toronto'}
node 0.2929864224097565
node りんごとは
node {'author': 'Tanaka', 'fruit': 'apple', 'city': 'Tokyo'}


node 0.33848023090543683
node T-dot > G-dot
node {'author': 'George', 'fruit': 'grape', 'city': 'Toronto'}
node 0.13562373847315362
node りんごとは
node {'author': 'Tanaka', 'fruit': 'apple', 'city': 'Tokyo'}


鸣谢：本节内容参考了Llama-index官方文档