# Testing the full RAG Flow

## Flow

1. Get user query
2. Retrieve context from Knowledge Base based on query
3. Create LLM prompt from query and context
4. Send Prompt to LLM and get response

## Libraries

In [1]:
# reload the external files every time before executing any cell
%load_ext autoreload
%autoreload 2

In [9]:
import os
import warnings

import requests
from dotenv import load_dotenv
from groq import Groq
from lancedb.table import Table

from src.constants import REPO_PATH, get_rag_config
from src.prompt_building import WELCOME_MSG, build_system_msg
from src.retrieval import get_context, get_knowledge_base

# ignore some warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

# Parameters

In [3]:
# secrets
load_dotenv(REPO_PATH)
groq_api_key = os.getenv("GROQ_TOKEN")

## Code

## Setup 

### Indexes for Knowledge Base

In [None]:
k_base: Table = get_knowledge_base()
print(f"Number of entries in the table: {k_base.count_rows()}")

In [5]:
# Full-text search index
# k_base.create_fts_index(["text", "title", "tags"], replace=True)

In [6]:
# Vector search index
# (takes 30-60 seconds)
# from constants import get_rag_config
# device: str = get_rag_config()["embeddings"]["device"]
# k_base.create_index(metric="cosine", replace=True, accelerator="cuda" if device == "cuda" else None)

## 1. User Query

In [7]:
query_text: str = "How can I reduce my heart Disease Risk?"

## 2. Retrieve Context from Knowledge Base

In [None]:
# get the table
retriever_config: dict = get_rag_config()["retriever"]
resp_formatted = get_context(k_base, query_text, **retriever_config)
print(resp_formatted)

## 3. Create LLM prompt from query and context

In [None]:
system_msg = build_system_msg(context=resp_formatted)
print(system_msg)

In [None]:
MESSAGES: list[dict[str, str]] = [
    {"role": "system", "content": system_msg},
    {"role": "assistant", "content": WELCOME_MSG.format(user_name="John Doe")},
    {"role": "user", "content": query_text},
]
for message in MESSAGES:
    print(f"{message['role'].upper()}: {message['content']}")

## 4. Send Prompt to LLM and get response

### Groq API

In [13]:
GROQ_MODELS_URL: str = "https://api.groq.com/openai/v1/models"


def get_model_list(api_key: str, models_url: str) -> list[dict]:
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
    response = requests.get(url=models_url, headers=headers, timeout=5)
    response.raise_for_status()  # Raise an HTTPError for bad responses
    return response.json()["data"]

In [None]:
# get list of models
model_list: list[dict] = get_model_list(api_key=groq_api_key, models_url=GROQ_MODELS_URL)
# get active models
active_model_ids: list[str] = sorted([md["id"] for md in model_list if md["active"]])
for model_id in active_model_ids:
    print(model_id)

In [None]:
client = Groq(api_key=groq_api_key)
response = client.chat.completions.create(
    model="mixtral-8x7b-32768",  # "llama-3.1-70b-versatile",  # "llama3-70b-8192",
    messages=MESSAGES,
    temperature=0.5,
    stream=False,
)
dict(response.usage)

In [None]:
print(response.choices[0].message.content)