In [3]:
%%bash
pip3 install llama-index-embeddings-huggingface llama-index-llms-huggingface-api

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

# Retrieve HF_TOKEN from the environment variables
hf_token = os.getenv("HUGGINGFACE_API_KEY")

os.environ['HTTP_PROXY'] = "http://127.0.0.1:7890"
os.environ['HTTPS_PROXY'] = "http://127.0.0.1:7890"
os.environ['NO_PROXY'] = "http://127.0.0.1:11434" #ollama的本地服务地址

llm = HuggingFaceInferenceAPI(
    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
    temperature=0.7,
    max_tokens=100,
    token=hf_token,
    provider="auto"
)

response = llm.complete("Hello, how are you?")
print(response)
# I am good, how can I help you today?

True

Hello! I'm just a computer program, so I don't have feelings, but thank you for asking. How can I assist you today?


In [5]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="./")
documents = reader.load_data()

In [6]:
print(documents[:3])

[Document(id_='6345a535-2f45-4e8c-a102-3a900483adc0', embedding=None, metadata={'file_path': '/home/chester/gitlab.chesterwang.com/chester-aigc-practice/2025-10-02-ai-agent-入门第一课/13 LlamaIndex框架.ipynb', 'file_name': '13 LlamaIndex框架.ipynb', 'file_size': 223707, 'creation_date': '2025-11-14', 'last_modified_date': '2025-11-14'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text="\n\n\nget_ipython().run_cell_magic('bash', '', 'pip3 install llama-index-embeddings-huggingface llama-index-llms-huggingface-api\\n')\n\n\n# ", path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_

In [7]:
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=30,
                         chunk_overlap=0, 
                         paragraph_separator="\n"),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ]
)

nodes = await pipeline.arun(documents=[Document.example()])
nodes

2025-11-14 09:40:17.673515: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-14 09:40:17.814792: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Metadata length (9) is close to chunk size (30). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.


[TextNode(id_='dc700ad0-b0aa-4d59-bbfa-603ff0edef06', embedding=[-0.055600520223379135, 0.011091159656643867, 0.03558356687426567, -0.04144258052110672, 0.05192653834819794, -0.06519750505685806, 0.0023052787873893976, -0.0059931399300694466, 0.016246214509010315, -0.08449862897396088, -0.0028635100461542606, 0.02638329192996025, 0.02220352180302143, 0.015145150944590569, 0.03175486624240875, 0.022757485508918762, -0.0012810081243515015, 0.01564873568713665, -0.0016829823143780231, 0.04073594510555267, 0.06956089287996292, 0.004809209145605564, 0.03031214512884617, 0.00885833241045475, -0.0327761173248291, -0.012011103332042694, -0.05094118416309357, -0.01943393237888813, -0.08847642689943314, -0.1399543285369873, 0.006631513126194477, 0.035538654774427414, 0.028988275676965714, 0.034742243587970734, -0.002250063233077526, 0.005220198072493076, -0.011043024249374866, 0.010354076512157917, -0.013974783010780811, 0.037898581475019455, 0.023188922554254532, 0.0015241532819345593, 0.009562

In [8]:
len(nodes)

19

In [9]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient(path="./data/alfred_chroma_db")
chroma_collection = db.get_or_create_collection("alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=25, chunk_overlap=0),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)

In [10]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

index.build_index_from_nodes(nodes)

IndexDict(index_id='553e8c23-a041-4f34-8e4f-d397524c4ca5', summary=None, nodes_dict={}, doc_id_dict={}, embeddings_dict={})

In [11]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
                              token=os.environ.get("HUGGINGFACE_API_KEY"),
                              provider="auto"
                              )
# 这里必须提供正确的provider参数，具体要查看hf官网右侧 是否有提供对应的provider 名字。这块还是没有搞明白，反正auto是可以的。
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
query_engine.query("What is the meaning of life?")
# The meaning of life is 42

Response(response='The meaning of life is a profound question that has been explored by philosophers, scientists, and thinkers throughout history. It encompasses inquiries into the purpose and significance of existence. While the provided information discusses LLMs (Large Language Models) as tools for generating knowledge and reasoning, it does not address the philosophical or existential question of the meaning of life. This question remains open to interpretation and can vary greatly depending on individual beliefs, values, and experiences.', source_nodes=[NodeWithScore(node=TextNode(id_='969970f8-5f4b-498c-b193-06cb59639f4b', embedding=None, metadata={'filename': 'README.md', 'category': 'codebase'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='86887d64-9e53-4731-9a3e-c53105ab92c8', node_type='4', metadata={'filename': 'README.md', 'category': 'codebase'}, hash='8f24ce02310203160c5e18490ef2c8a

In [None]:
# ------这段代码失败了，搞不懂为什么，明明网络是通的。------
from llama_index.core.evaluation import FaithfulnessEvaluator

# query_engine = # from the previous section
# llm = # from the previous section

# query index
evaluator = FaithfulnessEvaluator(llm=llm)
response = query_engine.query(
    "What battles took place in New York City in the American Revolution?"
)
print(response)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing

During the American Revolution, several significant battles occurred in and around New York City. The most notable was the Battle of Long Island (also known as the Battle of Brooklyn) in August 1776, where British forces under General William Howe defeated the Continental Army led by General George Washington. Another major engagement was the Battle of Harlem Heights in September 1776, which resulted in a tactical victory for the Americans. Additionally, there were skirmishes and smaller battles throughout the occupation period from 1776 to 1783, including actions at Fort Washington and Fort Lee. These conflicts were crucial in shaping the strategic landscape of the revolution in the northeastern theater.


ClientConnectorError: Cannot connect to host router.huggingface.co:443 ssl:default [None]

In [18]:
from llama_index.core.tools import FunctionTool

def get_weather(location: str) -> str:
    """Useful for getting the weather for a given location."""
    print(f"Getting weather for {location}")
    return f"The weather in {location} is sunny"

tool = FunctionTool.from_defaults(
    get_weather,
    name="my_weather_tool",
    description="Useful for getting the weather for a given location.",
)
tool.call("New York")

Getting weather for New York


ToolOutput(blocks=[TextBlock(block_type='text', text='The weather in New York is sunny')], tool_name='my_weather_tool', raw_input={'args': ('New York',), 'kwargs': {}}, raw_output='The weather in New York is sunny', is_error=False)

In [19]:
from llama_index.core import VectorStoreIndex
from llama_index.core.tools import QueryEngineTool
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore

embed_model = HuggingFaceEmbedding("BAAI/bge-small-en-v1.5")

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection("alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
query_engine = index.as_query_engine(llm=llm)
tool = QueryEngineTool.from_defaults(query_engine, name="some useful name", description="some useful description")

In [20]:
from llama_index.tools.google import GmailToolSpec

tool_spec = GmailToolSpec()
tool_spec_list = tool_spec.to_tool_list()

In [21]:
[(tool.metadata.name, tool.metadata.description) for tool in tool_spec_list]

[('load_data',
  "load_data() -> List[llama_index.core.schema.Document]\nLoad emails from the user's account."),
 ('search_messages',
  "search_messages(query: str, max_results: Optional[int] = None)\nSearches email messages given a query string and the maximum number\n:param query: The user's query\n:param max_results: The maximum number of search results"),
 ('create_draft',
  'create_draft(to: Optional[List[str]] = None, subject: Optional[str] = None, message: Optional[str] = None) -> str\nCreate and insert a draft email.\n:param to: The email addresses to send the message to\n:param subject: The subject for the event\n:param message: The message for the event'),
 ('update_draft',
  'update_draft(to: Optional[List[str]] = None, subject: Optional[str] = None, message: Optional[str] = None, draft_id: str = None) -> str\nUpdate a draft email.\n:param to: The email addresses to send the message to\n:param subject: The subject for the event\n:param message: The message for the event\n:pa

In [None]:
## get_agent 错误的。
# 参考链接 [MCP ToolSpec](https://github.com/run-llama/llama_index/blob/13c1f8736388da188c27ec74365ff00b39b5b235/llama-index-integrations/tools/llama-index-tools-mcp/examples/mcp.ipynb#L75)
from llama_index.tools.mcp import BasicMCPClient, McpToolSpec
from llama_index.core.agent import get_agent,Context

# We consider there is a mcp server running on 127.0.0.1:8000, or you can use the mcp client to connect to your own mcp server.
mcp_client = BasicMCPClient("http://127.0.0.1:8000/sse")
mcp_tool = McpToolSpec(client=mcp_client)

# get the agent
agent = await get_agent(mcp_tool)

# create the agent context
agent_context = Context(agent)

ImportError: cannot import name 'get_agent' from 'llama_index.core.agent' (/home/chester/py_venv_ai/lib/python3.12/site-packages/llama_index/core/agent/__init__.py)

In [25]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.tools import FunctionTool

# define sample Tool -- type annotations, function names, and docstrings, are all included in parsed schemas!
def multiply(a: int, b: int) -> int:
    """Multiplies two integers and returns the resulting integer"""
    return a * b

# initialize llm
llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
                              token=os.environ.get("HUGGINGFACE_API_KEY"),
                              provider="auto"
)

# initialize agent
agent = AgentWorkflow.from_tools_or_functions(
    [FunctionTool.from_defaults(multiply)],
    llm=llm
)

In [26]:
# stateless
response = await agent.run("What is 2 times 2?")

# remembering state
from llama_index.core.workflow import Context

ctx = Context(agent)

response = await agent.run("My name is Bob.", ctx=ctx)
print(response)
response = await agent.run("What was my name again?", ctx=ctx)
print(response)

CancelledError: 