### RAG with Vector Database - Agentic RAG

In [None]:
import os

from dotenv import load_dotenv
from llama_stack_client import LlamaStackClient, Agent, AgentEventLogger, RAGDocument
from termcolor import colored

In [None]:
load_dotenv()

In [None]:
base_url = f"http://{os.environ["LLAMA_STACK_SERVER_REMOTE_HOST"]}:{os.environ["LLAMA_STACK_SERVER_REMOTE_PORT"]}"

client = LlamaStackClient(
    base_url=base_url
)

In [None]:
urls = [
    "memory_optimizations.rst",
    "chat.rst",
    "llama3.rst",
    "datasets.rst",
    "qat_finetune.rst",
    "lora_finetune.rst"
]

base_document_url = "https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/"
documents = [
    RAGDocument(
        document_id=f"num-{i}",
        content = f"{base_document_url}/{url}",
        mime_type = "text/plain",
        metadata = {}
    )
    
    for i, url in enumerate(urls)
]

In [None]:
documents

In [None]:
vector_providers = [
    provider for provider in client.providers.list() if provider.api == "vector_io"
]

vector_providers

In [None]:
available_shields = [shield.identifier for shield in client.shields.list()]

if not available_shields:
    print(colored("No Shields are available", "yellow"))
else:
    print(f"Available Sheilds: {available_shields}")

In [None]:
available_models = [model.identifier for model in client.models.list() if model.model_type == "llm"]

print(f"Available Models: {available_models}")

In [None]:
selected_vector_provider = vector_providers[0]
selected_model = available_models[0]
selected_shields = available_shields

In [None]:
selected_vector_provider

In [None]:
from uuid import uuid4

In [None]:
vector_db_id = f"test-vector-db_{uuid4()}"

In [None]:
available_embedding_models = [model.identifier for model in client.models.list() if model.model_type == "embedding"]

print(f"Available Models: {available_embedding_models}")

In [None]:
client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model=available_embedding_models[0],
    embedding_dimension=384,
    provider_id=selected_vector_provider.provider_id
)

In [None]:
client.tool_runtime.rag_tool.insert(
    documents=documents,
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=512
)

In [None]:
available_tool_groups = client.toolgroups.list()

print(available_tool_groups)

In [None]:
instructions = """
    You are a helpful assistant, who answers questions professionally. 
    Use Knowledge Search tool to gather information needed to answer the question.
    Answer very succintly.
"""

agent = Agent(
    client,
    model=selected_model,
    instructions=instructions,
    sampling_params={
        "temperature": 0.2,
        "type": "top_p",
        "top_p": 0.9,
        "max_tokens": 1000
    },
    tools=[
        {
            "name": "builtin::rag/knowledge_search",
            "args": {
                "vector_db_ids": [vector_db_id]
            }
        }
    ],
    input_shields=[],
    output_shields=[],
    enable_session_persistence=False
)

In [None]:
session_id = agent.create_session("test-session")

print(f"Session Id: {session_id} with the Agent {agent.agent_id} created successfully!")

In [None]:
prompts = [
    "Was anything related to Llama3 discussed? If so, what was that?",
    "Tell me how to use LoRA?",
    "What about Quantization?"
]

for prompt in prompts:
    response = agent.create_turn(
        messages=[
            {
                "role": "user",
                "content": prompt
            }
        ],
        session_id=session_id
    )

    print(f"User ... > {prompt}")

    for log in AgentEventLogger().log(response):
        log.print()