## [Introduction to the LlamaHub](https://huggingface.co/learn/agents-course/unit2/llama-index/llama-hub#introduction-to-the-llamahub)

In [22]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="qwen2.5-coder:0.5b")

response = llm.complete("What is the capital of France?")
print(response)

The capital of France is Paris.


## [Components in LlamaIndex](https://huggingface.co/learn/agents-course/unit2/llama-index/components)

In [23]:
# Setting up the persona database

from datasets import load_dataset
from pathlib import Path

dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)

for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

In [24]:
# Loading and embedding persona documents

from llama_index.core import SimpleDirectoryReader, Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline


reader = SimpleDirectoryReader(
    input_dir='data'
)
documents = reader.load_data()

print(f"Loaded {len(documents)} documents")
    
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_overlap=0),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ]
)

nodes = await pipeline.arun(documents=documents[:10])
print(f"Generated {len(nodes)} nodes")

Loaded 5000 documents
Generated 10 nodes


In [29]:
# Storing and indexing documents

import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex


db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)
nodes = await pipeline.arun(documents=documents[:10])

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model,
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionAddEvent: capture() takes 1 positional argument but 3 were given


In [33]:
# Querying a VectorStoreIndex with prompts and LLMs

llm = Ollama(model="qwen2.5-coder:0.5b")

query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
    verbose=True,
)

response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)
print(response)

The persona described is a web developer who has a strong focus on HTML, CSS, and website building.


In [34]:
# Evaluation & Observability

from llama_index.core.evaluation import FaithfulnessEvaluator

evaluator = FaithfulnessEvaluator(llm=llm)

eval_result = evaluator.evaluate_response(response=response)
print(eval_result.passing)

False


## [Using Tools in LlamaIndex](https://huggingface.co/learn/agents-course/unit2/llama-index/tools)

In [1]:
# Creating a Function Tool

from llama_index.core.tools import FunctionTool

def get_weather(location: str) -> str:
    """Useful for getting the weather for a given location"""
    print(f"Getting weather for {location}")
    return f"The weather in {location} is sunny"

tool = FunctionTool.from_defaults(
    get_weather,
    name="my_weather_tool",
    description="Useful for getting the weather for a given location",
)

print(tool.metadata)

tool.call("New York")

ToolMetadata(description='Useful for getting the weather for a given location', name='my_weather_tool', fn_schema=<class 'llama_index.core.tools.utils.my_weather_tool'>, return_direct=False)
Getting weather for New York


ToolOutput(content='The weather in New York is sunny', tool_name='my_weather_tool', raw_input={'args': ('New York',), 'kwargs': {}}, raw_output='The weather in New York is sunny', is_error=False)

In [3]:
# Creating a QueryEngineTool
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex
from llama_index.llms.ollama import Ollama
from llama_index.core.tools import QueryEngineTool
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
llm = Ollama(model="qwen2.5-coder:0.5b")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model,
)
query_engine = index.as_query_engine(llm=llm)

tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="alfred_query_engine",
    description="Useful for querying the alfred index",
)

print(tool.metadata)

await tool.acall(
    "Responds about research on the impact of AI on the future of work and society?"
)

  from .autonotebook import tqdm as notebook_tqdm
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


ToolMetadata(description='Useful for querying the alfred index', name='alfred_query_engine', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)


Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


ToolOutput(content='Research on the impact of AI on the future of work and society.', tool_name='alfred_query_engine', raw_input={'input': 'Responds about research on the impact of AI on the future of work and society?'}, raw_output=Response(response='Research on the impact of AI on the future of work and society.', source_nodes=[NodeWithScore(node=TextNode(id_='1a4731fd-7dda-4e47-a48b-681f6b092529', embedding=None, metadata={'file_path': '/Users/pardeep/Playground/study-group-hf-agents-course/units/unit2_frameworks/llamaindex/data/persona_100.txt', 'file_name': 'persona_100.txt', 'file_type': 'text/plain', 'file_size': 107, 'creation_date': '2025-06-28', 'last_modified_date': '2025-06-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE

In [7]:
# Creating ToolSpecs

from llama_index.tools.google import GmailToolSpec

tool_spec = GmailToolSpec()
tool_spec_list = tool_spec.to_tool_list()
print(tool_spec_list)

for tool in tool_spec_list:
    print(
        f"tool.metadata.name: {tool.metadata.name}\n"
        f"tool.metadata.description: {tool.metadata.description}"
    )

[<llama_index.core.tools.function_tool.FunctionTool object at 0x327f13ed0>, <llama_index.core.tools.function_tool.FunctionTool object at 0x326f44850>, <llama_index.core.tools.function_tool.FunctionTool object at 0x160008c10>, <llama_index.core.tools.function_tool.FunctionTool object at 0x326f28890>, <llama_index.core.tools.function_tool.FunctionTool object at 0x326f89910>, <llama_index.core.tools.function_tool.FunctionTool object at 0x326f28210>]
tool.metadata.name: load_data
tool.metadata.description: load_data() -> List[llama_index.core.schema.Document]
Load emails from the user's account.
tool.metadata.name: search_messages
tool.metadata.description: search_messages(query: str, max_results: Optional[int] = None)

        Searches email messages given a query string and the maximum number
        of results requested by the user
           Returns: List of relevant message objects up to the maximum number of results.

        Args:
            query (str): The user's query
          