In [1]:
# import the relevant libraries
import os
from dotenv import load_dotenv
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Loading environment variables
load_dotenv()

HF_TOKEN =  os.getenv('HF_TOKEN')

In [7]:
# implementing a basic text completion
llm = HuggingFaceInferenceAPI(
    model =  "Qwen/Qwen2.5-Coder-32B-Instruct",
    temperature = 0.7,
    max_tokens = 200,
    token = HF_TOKEN,
)

output = llm.complete('How are you doing?')
print(output)

I'm just a computer program, so I don't have feelings, but thanks for asking! How can I assist you today?


## Creating a RAG Pipeline using Components from LlamaIndex

In [2]:
from llama_index.core import SimpleDirectoryReader # directory reader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding # to create our vector representation
from llama_index.core.node_parser import SentenceSplitter # split document into chunks (nodes)
from llama_index.core.ingestion import IngestionPipeline # hanles transformation and loading into the vectorstore
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

## Loading Document

In [9]:
reader =  SimpleDirectoryReader(input_dir='./data') # load the reader object

docs = reader.load_data(show_progress=True) # load the data and return the list of the documents

Loading files: 100%|██████████| 1/1 [00:00<00:00, 83.24it/s]


## Instantiating a DB client and creating our ingestion pipeline

In [10]:
# creating our db client and then the vectorstore
client =  chromadb.PersistentClient()
chroma_collection =  client.get_or_create_collection('llama_collection')

vector_store = ChromaVectorStore(chroma_collection=chroma_collection) # now we have our vector store object


# using the ingestion pipeline
pipeline = IngestionPipeline(
transformations=[
    SentenceSplitter(chunk_size=100, chunk_overlap=0),
    HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
],
vector_store=vector_store
)

# creating the vector embeddings - injesting directly into the vectordb
nodes  = pipeline.run(
    show_progress = True,
    documents = docs,
)

Parsing nodes: 100%|██████████| 1/1 [00:01<00:00,  1.47s/it]
Generating embeddings: 100%|██████████| 5120/5120 [00:21<00:00, 242.50it/s]


In [11]:
# creating an index from our vectorstore and its embeddings
from llama_index.core import VectorStoreIndex

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# creating the index object
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model
)

In [12]:
# Before we can query our index, we need to convert it to a query interface. The most common conversion options are:

# as_retriever: For basic document retrieval, returning a list of NodeWithScore objects with similarity scores
# as_query_engine: For single question-answer interactions, returning a written response
# as_chat_engine: For conversational interactions that maintain memory across multiple messages, 
# returning a written response using chat history and indexed context


# let's query our index
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI


llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")

query_engine = index.as_query_engine(
    llm=llm,
    response_mode = "tree_summarize" # other options - "refine" and "compact"
)

response  =  query_engine.query('list the writers of the journal')
print(response)

The provided information does not contain any details about the writers of a journal. The given text appears to be a file path and some non-readable characters, which do not provide any information about authors or contributors to a journal.


In [13]:
# evaluating the query engine response
from llama_index.core.evaluation import CorrectnessEvaluator

evaluator = CorrectnessEvaluator(
    llm=llm,
)

eval_result = evaluator.evaluate_response(
    query='list the writers of the journal',
    response=response)
eval_result.passing

False

## Tooling - creating tools for Agents

In [14]:
# implementing a function tool
# import the package
from llama_index.core.tools import FunctionTool

In [None]:
# create a basic calculator function
def add(x:int, y:int) -> int:
    """calculate the sum of two variables"""
    total =  x + y
    return total

# convert it to a tool using the functiontool class
tool = FunctionTool.from_defaults(
    add,
    'calculator',
    'calculate the sum of two variables'
)

tool.call(3, 5)

ToolOutput(content='8', tool_name='calculator', raw_input={'args': (3, 5), 'kwargs': {}}, raw_output=8, is_error=False)

In [17]:
# implementing a basic queryengine tool
from llama_index.core.tools import QueryEngineTool

tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name='science-journal-engine',
    description='a knowledge base to find local information about a scientific journal'
)

await tool.acall('list the writers of the journal')

ToolOutput(content='The provided information does not contain any details about the writers of a journal. The data shown appears to be a file path and some encoded or non-readable text, which does not include author names or any other information related to the writers of a journal.', tool_name='science-journal-engine', raw_input={'input': 'list the writers of the journal'}, raw_output=Response(response='The provided information does not contain any details about the writers of a journal. The data shown appears to be a file path and some encoded or non-readable text, which does not include author names or any other information related to the writers of a journal.', source_nodes=[NodeWithScore(node=TextNode(id_='787f5dda-a049-4468-a709-312a59a2fb18', embedding=None, metadata={'file_path': '/Users/chibuikeiwuchukwu/Docs/Real_ML_Project/ecommerce_system/llama_agent/data/A132113010125.pdf', 'file_name': 'A132113010125.pdf', 'file_type': 'application/pdf', 'file_size': 548575, 'creation_dat

In [20]:
# implementing a tool spec
from llama_index.tools.google import GmailToolSpec

tool_spec = GmailToolSpec()
# convert it to a list of tools
tool_spec_list  = tool_spec.to_tool_list()

# view the meta data of each tool in the list
[(tool.metadata.name, tool.metadata.description) for tool in tool_spec_list]

[('load_data',
  "load_data() -> List[llama_index.core.schema.Document]\nLoad emails from the user's account."),
 ('search_messages',
  "search_messages(query: str, max_results: Optional[int] = None)\n\n        Searches email messages given a query string and the maximum number\n        of results requested by the user\n           Returns: List of relevant message objects up to the maximum number of results.\n\n        Args:\n            query (str): The user's query\n            max_results (Optional[int]): The maximum number of search results\n            to return.\n\n        "),
 ('create_draft',
  "create_draft(to: Optional[List[str]] = None, subject: Optional[str] = None, message: Optional[str] = None) -> str\n\n        Create and insert a draft email.\n           Print the returned draft's message and id.\n           Returns: Draft object, including draft id and message meta data.\n\n        Args:\n            to (Optional[str]): The email addresses to send the message to\n     

## Agents - creating agents

In [None]:
# building a basic function-calling agent

# firstly, define a function - perhaps a calculator
def multiply(a:int, b:int) -> int:
    """ calculate the multiplication of two numbers of type integers"""
    return a*b

# define your llm
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
llm = HuggingFaceInferenceAPI(model="Qwen/Qwen2.5-Coder-32B-Instruct")

# Initialize the agent using the AgentWorkFlow class 
from llama_index.core.agent.workflow import AgentWorkflow
agent  = AgentWorkflow.from_tools_or_functions(
    tools_or_functions=[FunctionTool.from_defaults(multiply)],
    llm=llm
)

In [None]:
# testing the Function calling agent
result =  await agent.run('what is 2 times 2')
print(result)

2 times 2 is 4.


In [None]:
# implementing a stateful function-calling agent
# to remember or keep state of past interactions we can use the Context class 

from llama_index.core.workflow import Context

#wrap the context around the agent
ctx = Context(agent)

# re-writing our agent run method
result1 = await agent.run('My name is bob', ctx=ctx)
print(result1)

Hello Bob, it seems you asked for a multiplication, and 2 multiplied by 3 is 6. How can I assist you further?


In [None]:
# test out our initial agent runs
result2 = await agent.run('was my name capitalize?', ctx=ctx)
print(result2)

Yes, your name "Bob" is capitalized. How can I assist you further?


### Creating a more complex agent - Agentic RAG

In [31]:
# first we can re-use the intial index created earlier
query_engine = index.as_query_engine(
    llm=llm,
    similarity_top_k=3
)

# next, create a tool of this engine
query_engine_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name='knowledge base tool',
    description='searches and retrieve local information about the journal',
    return_direct=False
)

# next initialize the agent
query_engine_agent = AgentWorkflow.from_tools_or_functions(
    tools_or_functions=[query_engine_tool],
    llm=llm,
    system_prompt= 'you are a kind assistant with access to a database of journal information',
)

query_response =  await query_engine_agent.run('who are the authors of the journal')
print(query_response)

WorkflowRuntimeError: Error in step 'run_agent_step': 402, message='Payment Required', url='https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1/chat/completions'

### Creating a Multi-agentic system

In [None]:
# importing the necessary packages
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.agent.workflow import ReActAgent, AgentWorkflow
from llama_index.core import VectorStoreIndex
from llama_index.core.tools import QueryEngineTool, FunctionTool
from llama_index.core import SimpleDirectoryReader # directory reader
from llama_index.core.node_parser import SentenceSplitter # split document into chunks (nodes)
from llama_index.core.ingestion import IngestionPipeline # hanles transformation and loading into the vectorstore
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore


# selecting and instantiating a llm model
llm = Ollama(
    model='deepseek-r1',
    request_timeout=60.0,
    temperature=0.7,
    context_window=8000
)

# create a rag system and convert it to query-engine tool
# step1: using the simpledirectoryreader to load the file in a director
reader = SimpleDirectoryReader('./data')
docs = reader.load_data(show_progress=True)


# step 2: split into tokens, convert to embeddings and save in the vector using the Ingestion Pipeline
# creating new client
client =  chromadb.PersistentClient()
chroma_collection =  client.get_or_create_collection('journal_collection')
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) # now we have our vector store object


pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_overlap=0, ),
        OllamaEmbedding(model_name='deepseek-r1')
    ],
    vector_store=vector_store
)

nodes = pipeline.run(
    show_progress = True,
    documents = docs,
    )

# instantiating an embedding model
embed_model = OllamaEmbedding(model_name='deepseek-r1')


# creating an index
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model
)


query_engine_index = index.as_query_engine(
    llm=llm,
    response_mode = "tree_summarize"
)


# creating a simple basic calculator function
def add(x:int, y:int) -> int:
    """add two numbers together"""
    return x + y



# creating a 2 ReAct agents 


query_engine_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine_index,
    name='query engine tool',
    description='queries a rag system '
)


# creating a function tool
calculator_tool = FunctionTool.from_defaults(
    fn=add,
    name='a basic calculator',
    description='adds two numbers together'
)


#ReAct Agent 1
query_engine_agent = ReActAgent(
    name='journal lookup',
    description='looks up information in relation to the journal article',
    system_prompt='Use this tool to query a RAG system to retrieve information to related questions',
    tools=[query_engine_tool],
    llm=llm

)

#ReAct Agent 2
calculator_agent = ReActAgent(
    name='basic calculator',
    description='adds two numbers',
    system_prompt='Use this tool to compute addition of numbers',
    tools=[calculator_tool],
    llm=llm
)

agent_response = AgentWorkflow(
    agents= [query_engine_agent, calculator_agent],
    root_agent='basic calculator'
)

# testing our multi-agent system
output = await agent_response.run(
    user_msg='who are the authors of the journal?'
)

print(output)

  from .autonotebook import tqdm as notebook_tqdm
Loading files: 100%|██████████| 1/1 [00:00<00:00, 152.51it/s]
Parsing nodes: 100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
Generating embeddings: 100%|██████████| 326/326 [30:49<00:00,  5.67s/it]


NameError: name 'output' is not defined

In [4]:
# testing our multi-agent system
output = await agent_response.run(
    user_msg='who are the authors of the journal?'
)

print(output)

<think>
Alright, so looking at the conversation history, the user initially asked about the authors of a journal, which I couldn't answer directly. So, I used the handoff tool, directing them to an agent called 'journal lookup'. 

Now, the observation says that the agent has taken over and is handling the request. My task now is to figure out how to proceed with this new information.

I need to understand what exactly the 'journal lookup' agent does. From the previous message, it seems like they handle accessing journal author information. But since I don't have access to specific data, maybe they can only provide general info or perhaps require more details from the user.

So the next step is probably for me to ask a follow-up question to get the necessary information. The most logical way would be to ask what specific journal they're interested in. That way, once I know the journal name, I can pass that information to the 'journal lookup' agent again or perhaps another tool if needed

## Workflow

### Getting started workflow

In [None]:
# import the necessary packages
from llama_index.core.workflow import StartEvent, StopEvent, step, Event, Workflow


class MyWorkFlow(Workflow):
    """basic single step workflow"""
    @step
    async def first_step(self, ev:StartEvent) -> StopEvent:
        return StopEvent(result='Halleluia')
    

# testing out our basic workflow
# instantiate the object 
mfw = MyWorkFlow()
result = await mfw.run()
print(result)

Halleluia


### Multi-step workflow implementation 

In [7]:
# To connect multiple steps, we create custom events that carry data between steps.

# start by defining an intermediate event which is custom-defined
class IntermediateEvent(Event):
    intermediate_output: str

# define your workflow
class VendingMachineWorflow(Workflow):
    """ handles the vending machine work processes"""
    @step
    def first_step(self, ev:StartEvent) -> IntermediateEvent:
        return IntermediateEvent(intermediate_output= 'Machine starting...')
    
    @step
    def second_step(self, ev: IntermediateEvent) -> StopEvent:
        final_answer = f'Machine ending after {ev.intermediate_output}'
        return StopEvent(result=final_answer)
    
# creating the vending machine workflow instance
vending_flow =  VendingMachineWorflow()
vending_result = await vending_flow.run()
print(vending_result)

Machine ending after Machine starting...


### Adding loops, joins and branches to our multi-step agentic workflow

In [11]:
# inserting a loop in our multi-step workflow
import random as re
import typing as t

class ProcessingEvent(Event):
    intermediate_result:str

class LoopEvent(Event):
    loop_result: str

class MyMultiStepWorkflow(Workflow):
    """ impelemting a looping agentic workflow"""

    @step
    async def first_step(self, ev: t.Union[StartEvent, LoopEvent]) -> t.Union[ProcessingEvent, LoopEvent]:
        if re.randint(0,10) == 0:
            print('try again...')
            return LoopEvent(loop_result='you need to try again')
        else:
            return ProcessingEvent(intermediate_result='you made it, carry on to step two')
    
    @step
    async def second_step(self, ev:ProcessingEvent) -> StopEvent:
        final_output =  f'{ev.intermediate_result}, Now you have arrive.'
        return StopEvent(result=final_output)

In [12]:
# testing out new agentic workflow
looping_agent = MyMultiStepWorkflow()
result = await looping_agent.run()
print(result)

try again...
you made it, carry on to step two, Now you have arrive.


In [13]:
# visualizing the workflow
from llama_index.utils.workflow import draw_all_possible_flows

draw_all_possible_flows(looping_agent, 'flow.html')

flow.html


### Using state management in our agentic workflow

In [15]:
# inserting the argument ctx which will be of type Context and help keep track of the state of our workflow
import random as re
import typing as t
from llama_index.core.workflow import Context

class ProcessingEvent(Event):
    intermediate_result:str

class LoopEvent(Event):
    loop_result: str

class MyMultiStepWorkflow(Workflow):
    """ impelemting a looping agentic workflow"""

    @step
    async def first_step(self, ctx:Context, ev: t.Union[StartEvent, LoopEvent]) -> t.Union[ProcessingEvent, LoopEvent]:
        if re.randint(0,10) == 0:
            print('try again...')
            await ctx.set('outcome', 'you need to try again' )
            return LoopEvent(loop_result='you need to try again')
            
        else:
            await ctx.set('outcome', 'you made it, carry on to step two' )
            return ProcessingEvent(intermediate_result='you made it, carry on to step two')
            
    
    @step
    async def second_step(self, ctx:Context,  ev:ProcessingEvent) -> StopEvent:
        final_output =  f'{ev.intermediate_result}, Now you have arrive.'
        current_context = await ctx.get('outcome')
        print(current_context) # confirming state validity
        return StopEvent(result=final_output)

In [16]:
stateful_agent = MyMultiStepWorkflow()
result = await stateful_agent.run()
print(result)

you made it, carry on to step two
you made it, carry on to step two, Now you have arrive.
