# Building Event-Driven Agentic RAG with Qdrant and LlamaIndex

In [118]:
%pip install qdrant-client fastembed llama-index-vector-stores-qdrant llama-index-embeddings-openai llama-index-llms-openai python-dotenv llama-index llama-index-utils-workflow llama-index-readers-web

Collecting llama-index-readers-web
  Downloading llama_index_readers_web-0.4.3-py3-none-any.whl.metadata (1.2 kB)
Collecting chromedriver-autoinstaller<0.7,>=0.6.3 (from llama-index-readers-web)
  Downloading chromedriver_autoinstaller-0.6.4-py3-none-any.whl.metadata (2.1 kB)
Collecting html2text<2025,>=2024.2.26 (from llama-index-readers-web)
  Downloading html2text-2024.2.26.tar.gz (56 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting lxml-html-clean>=0.4.2 (from llama-index-readers-web)
  Downloading lxml_html_clean-0.4.2-py3-none-any.whl.metadata (2.4 kB)
Collecting lxml>=5.4.0 (from llama-index-readers-web)
  Downloading lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl.metadata (6.6 kB)
Collecting markdownify>=1.1.0 (from llama-index-readers-web)
  Downloading markdownify-1.1.0-py3-none-any.whl.metadata (9.1 kB)
Collecting newspaper3k<0.3,>=0.2.

In [34]:
from dotenv import load_dotenv

load_dotenv()

True

In [35]:
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore


client = QdrantClient("http://localhost:6333")

if client.collection_exists("my_collection"):
    client.delete_collection("my_collection")

vector_store = QdrantVectorStore(
    collection_name="my_collection",
    client=client,
    fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions",
)

In [36]:
from llama_index.core.node_parser import SimpleNodeParser

node_parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=32)

In [37]:
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.node_parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=32)


In [48]:
from llama_index.core.schema import Document

documents = [
    Document(
        text="LlamaIndex is a simple, flexible data framework for connecting custom data sources to large language models.",
        metadata={
            "library": "llama-index",
        },
    ),
    Document(
        text="Qdrant is a vector database & vector similarity search engine.",
        metadata={
            "library": "qdrant",
        },
    ),
]

In [38]:
from llama_index.core import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents=documents,
    vector_store=vector_store,
    embed_model=OpenAIEmbedding(),
    storage_context=storage_context,
)

In [42]:
query_engine = index.as_query_engine()

response = query_engine.query("What is Qdrant?")
print(response)

Qdrant is a vector database and vector similarity search engine.


In [None]:
from pydantic import BaseModel, Field
from typing import List, Union

from llama_index.core.workflow import (Workflow, Event, step, StartEvent, StopEvent)
from llama_index.llms.openai import OpenAIResponses
from llama_index.core.llms import ChatMessage

class SaveToDocs(BaseModel):
	"""The statement to save into your collection."""
	statement: str = Field(default_factory=list)


class Ask(BaseModel):
	"""The natural language questions that can be asked to a Q&A agent."""
	queries: List[str] = Field(default_factory=list)


class Actions(BaseModel):
	"""Actions to take based on the latest user message."""
	actions: List[Union[SaveToDocs, Ask]] = Field(default_factory=list)

class WriteDocument(Event):
	statement: str

class QueryIndex(Event):
	queries: list[str]

class QdrantDocumentAgent(Workflow):
	def __init__(self, *args, **kwargs):
		self.llm = OpenAIResponses(model="gpt-4.1-mini")
		self.system_prompt = """You are a docs assistant. You evaluate incoming queries and break them down to subqueries when needed.
								You decide on the next best course of action. Overall, here are the options:
								- You can write documents to your collection.
								- You can answer a questions based on the contents of your collection."""
		super().__init__(*args, **kwargs)

	@step
	async def start(self, ev: StartEvent) -> WriteDocument | QueryIndex:
		sllm = self.llm.as_structured_llm(Actions)
		response = await sllm.achat(
				[
						ChatMessage(role="system", content=self.system_prompt),
						ChatMessage(role="user", content=ev.query),
				]
		)
		actions = response.raw.actions
		print(actions)
		for action in actions:
			if isinstance(action, SaveToDocs):
				print("Got Save event")
				return WriteDocument(statement=action.statement)
			elif isinstance(action, Ask):
				print("Got Ask event")
				return QueryIndex(queries=action.queries)
	@step
	def query_index(self, ev: QueryIndex) -> StopEvent:
		print(f"Request to query index with queries: {ev.queries}")
		return StopEvent()

	@step
	def save_to_index(self, ev: WriteDocument) -> StopEvent:
		print(f"Request to write to index: {ev.statement}")
		return StopEvent()

In [132]:
workflow = QdrantDocumentAgent()

In [133]:
respone = await workflow.run(start_event=StartEvent(query="What's Tuana like?"))

[Ask(queries=['Who is Tuana?', 'What are the characteristics or personality traits of Tuana?', 'What is Tuana known for?'])]
Got Ask evene
queries=['Who is Tuana?', 'What are the characteristics or personality traits of Tuana?', 'What is Tuana known for?']


In [122]:
print(response)

None


In [95]:
class QdrantDocumentAgent(Workflow):
  def __init__(self, *args, **kwargs):
        self.llm = OpenAIResponses(model="gpt-4.1-mini")
        self.system_prompt = """You are a docs assistant. You evaluate incoming queries and break them down to subqueries when needed.
                          You decide on the next best course of action. Overall, here are the options:
                          - You can write documents to your collection.
                          - You can answer a questions based on the contents of your collection."""
        super().__init__(*args, **kwargs)

  @step
  async def start(self, ev: StartEvent) -> WriteDocument | QueryIndex:
    sllm = self.llm.as_structured_llm(Actions)
    response = await sllm.achat(
        [
            ChatMessage(role="system", content=self.system_prompt),
            ChatMessage(role="user", content=ev.query),
        ]
    )
    actions = response.raw.actions
    print(actions)
    for action in actions:
      if isinstance(action, SaveToDocs):
          return WriteDocument(statement=action.statement)
      elif isinstance(action, Ask):
          return QueryIndex(queries=action.queries)
  
  @step
  def query_index(self, ev: QueryIndex) -> StopEvent:
    for query in ev.queries:
      response = query_engine.query(query)
      print(response)
    return StopEvent()

  @step
  def save_to_index(self, ev: WriteDocument) -> StopEvent:
    document = Document(text=ev.statement)
    index.insert(document)
    print(f"Wrote {document} to the index")
    return StopEvent()

In [96]:
workflow = QdrantDocumentAgent()
response = await workflow.run(start_event=StartEvent(query="Write this to my index please: Tuana is a devrel at llamaindex"))
print(response)

[SaveToDocs(statement='Tuana is a devrel at llamaindex')]
None
