In [95]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex
import qdrant_client

import logging
import sys
import json

from llama_index.core import Settings

from llama_index.core.tools import QueryEngineTool
from llama_index.core.agent.workflow import FunctionAgent

from qazure import get_llm, get_embedder

from llama_index.core.workflow import Context

from llama_index.core.agent.workflow import AgentStream, AgentInput, AgentOutput, ToolCall, ToolCallResult

In [49]:
logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [50]:
llm = get_llm()
embed_model = get_embedder()

In [51]:
Settings.llm = llm
Settings.embed_model = embed_model

In [52]:
client = qdrant_client.AsyncQdrantClient(
    host="localhost",
    port=6333
)

INFO:httpx:HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"


In [53]:
collections = await client.get_collections()
print(collections)
for collection in collections:
    for coll in collection[1]:
        print(coll.name)

INFO:httpx:HTTP Request: GET http://localhost:6333/collections "HTTP/1.1 200 OK"
HTTP Request: GET http://localhost:6333/collections "HTTP/1.1 200 OK"
HTTP Request: GET http://localhost:6333/collections "HTTP/1.1 200 OK"
HTTP Request: GET http://localhost:6333/collections "HTTP/1.1 200 OK"
collections=[CollectionDescription(name='individual_upload_test'), CollectionDescription(name='docling_test'), CollectionDescription(name='paul_graham')]
individual_upload_test
docling_test
paul_graham


In [54]:
vector_store = QdrantVectorStore(aclient=client, collection_name="docling_test")

In [55]:
loaded_index = VectorStoreIndex.from_vector_store(
    vector_store,
    # Embedding model should match the original embedding model
    # embed_model=Settings.embed_model
)
qq_engine = loaded_index.as_query_engine(similarity_top_k=3)

In [74]:
retrieval_tool = QueryEngineTool.from_defaults(
        query_engine=qq_engine,
        name="docling_engine",
        description=(
            "Use this retrieval tool to get information from indexed documents"
            "Use a detailed plain text question as input to the tool."
        ),
    )

In [75]:
# Create an agent workflow with our calculator tool
retrieval_agent = FunctionAgent(
    name="RetrievalAgent",
    tools=[retrieval_tool],
    llm=llm,
    system_prompt="You are a helpful assistant that retrieves information using the retrieval tool",
)

In [72]:
ctx = Context(retrieval_agent)

In [44]:
query = "Which are the main AI models in Docling?"

In [45]:
response = await retrieval_agent.run(query,ctx=ctx)

INFO:httpx:HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:6333/col

In [100]:
handler = retrieval_agent.run(user_msg="Which are the main AI models in Docling?")

async for event in handler.stream_events():
        if isinstance(event, AgentStream):
                print(event.delta, end="", flush=True)
        elif isinstance(event, ToolCallResult):
                raw_output = event.tool_output.raw_output
                print(event)
                print(len(raw_output.source_nodes))
                for source_node in raw_output.source_nodes:
                        print(source_node.get_content())
                        print(source_node.get_score())
                        print(json.dumps(source_node.metadata, indent=4))

INFO:httpx:HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/

In [46]:
response = await retrieval_agent.run("Hello my name is Akshay", ctx=ctx)
print(str(response))

INFO:httpx:HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
Hello Akshay! How can I assist you today?


In [51]:
response = await retrieval_agent.run("Hello. What is my name?", ctx=ctx)
print(str(response))

INFO:httpx:HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://evaln-openai.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
Your name is Akshay!


In [59]:
memory = await ctx.get("memory")

In [60]:
memory.get()

[ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Hello my name is Akshay')]),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Hello Akshay! How can I assist you today?')]),
 ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Hello. What is my name?')]),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Your name is Akshay!')]),
 ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Which are the main AI models in Docling?')]),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'tool_calls': [ChoiceDeltaToolCall(index=0, id='call_9CS6sUmap4b8JUwABCYyGjGq', function=ChoiceDeltaToolCallFunction(arguments='{"input":"What are the main AI models used in

In [58]:
ctx.to_dict()

{'globals': {'memory': '{"__is_component": true, "value": {"chat_store": {"store": {"chat_history": [{"role": "user", "additional_kwargs": {}, "blocks": [{"block_type": "text", "text": "Hello my name is Akshay"}]}, {"role": "assistant", "additional_kwargs": {}, "blocks": [{"block_type": "text", "text": "Hello Akshay! How can I assist you today?"}]}, {"role": "user", "additional_kwargs": {}, "blocks": [{"block_type": "text", "text": "Hello. What is my name?"}]}, {"role": "assistant", "additional_kwargs": {}, "blocks": [{"block_type": "text", "text": "Your name is Akshay!"}]}, {"role": "user", "additional_kwargs": {}, "blocks": [{"block_type": "text", "text": "Which are the main AI models in Docling?"}]}, {"role": "assistant", "additional_kwargs": {"tool_calls": [{"index": 0, "id": "call_9CS6sUmap4b8JUwABCYyGjGq", "function": {"arguments": "{\\"input\\":\\"What are the main AI models used in Docling?\\"}", "name": "docling_engine"}, "type": "function"}]}, "blocks": [{"block_type": "text"