In [1]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/projects/LLM/AgenticRAG/rag_agents

Mounted at /content/drive
/content/drive/MyDrive/projects/LLM/AgenticRAG/rag_agents


In [None]:
!pip install git+https://github.com/stanfordnlp/dspy.git

In [None]:
import os
import dspy
import random
import manage_vector_db as db_util
from dspy.retrieve.chromadb_rm import ChromadbRM
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
rm = db_util.get_retriever()
rm.invoke("Who is Paul Atreides?")[0].page_content

'A beginning is the time for taking the most delicate care that the balances are correct. This every sister of the Bene Gesserit knows. To begin your study of the life of Muad\'Dib, then, take care that you first place him in his time: born in the 57th year of the Padishah Emperor, Shaddam IV. And take the most special care that you locate Muad\'Dib in his place: the planet Arrakis. Do not be deceived by the fact that he was born on Caladan and lived his first fifteen years there. Arrakis, the planet known as Dune, is forever his place.\n-from "Manual of Muad\'Dib" by the Princess Irulan\n\nIn the week before their departure to Arrakis, when all the final scurrying about had reached a nearly unbearable frenzy, an old crone came to visit the mother of the boy, Paul.\nIt was a warm night at Castle Caladan, and the ancient pile of stone that had served the Atreides family as home for twenty-six generations bore that cooled-sweat feeling it acquired before a change in the weather.\nThe old

In [None]:
with open('./dune_questions.txt', 'r') as f:
  lines = f.readlines()
data = []
for line in lines:
  q_and_a = line.replace('\n', '').split(';')
  ex = dspy.Example(question=q_and_a[0], answer=q_and_a[1]).with_inputs("question")
  data.append(ex)
print(f'Q: {data[0].get("question")}')
print(f'A: {data[0].get("answer")}')

Q: What planet do the Atreides occupy at the beginning of Dune?
A: Caladan


In [None]:
random.shuffle(data)
train = data[:20]
dev = data[20:]
print(f'train: {len(train)}')
print(f'dev: {len(dev)}')

train: 20
dev: 36


In [None]:
turbo = dspy.OpenAI(model='gpt-4o-mini')

embedding_function = OpenAIEmbeddingFunction(
    api_key=os.environ.get('OPENAI_API_KEY'),
    model_name="text-embedding-ada-002"
)

retriever_model = ChromadbRM(
    'Dune',
    os.environ["VECTOR_DB_DIR"],
    embedding_function=embedding_function,
    k=5
)

dspy.settings.configure(lm=turbo, rm=retriever_model)

In [None]:
retriever_model("What is Jessica’s relationship with Duke Leto?", k=5)[0]

{'id': 'b2e05189-347b-4b24-b847-0b09cc297a0a',
 'score': 0.389038461343692,
 'long_text': ' history."\n"Don\'t be facetious, girl! You know as well as I do what forces surround us. We\'ve a three-point civilization: the Imperial Household balanced against the Federated Great Houses of the Landsraad, and between them, the Guild with its damnable monopoly on interstellar transport. In politics, the tripod is the most unstable of all structures. It\'d be bad enough without the complication of a feudal trade culture which turns its back on most science." \nJessica spoke bitterly: "Chips in the path of the flood -- and this chip here, this is the Duke Leto, and this one\'s his son, and this one\'s --"\n"Oh, shut up, girl. You entered this with full knowledge of the delicate edge you walked." \n" \'I am Bene Gesserit: I exist only to serve,\' " Jessica quoted. \n"Truth." the old woman said. "And all we can hope for now is to prevent this from erupting into general conflagration, to salvage w

In [None]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [None]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [None]:
from dspy.teleprompt import BootstrapFewShot, MIPRO

# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

# Set up a basic teleprompter, which will compile our RAG program.
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)
#teleprompter = MIPRO(metric=validate_context_and_answer) #TODO

# Compile!
compiled_rag = teleprompter.compile(RAG(), trainset=train)

 85%|████████▌ | 17/20 [00:04<00:00,  3.59it/s]

Bootstrapped 4 full traces after 18 examples in round 0.





In [None]:
# Ask any question you like to this simple RAG program.
my_question = "Who is the Kwisatz Haderach?"

# Get the prediction. This contains `pred.context` and `pred.answer`.
pred = compiled_rag(my_question)

# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

Question: Who is the Kwisatz Haderach?
Predicted Answer: Paul Atreides
Retrieved Contexts (truncated): ['isatz Haderach?"\n"Yes, the one who can be many places at once: the Kwisatz Haderach. Many men have tried the drug . . . so many, but none has succeeded." \n"They tried and failed, all of them?" \n"Oh, n...', '.\nHe said: "But my mother tells me many Bene Gesserit of the schools don\'t know their ancestry."\n"The genetic lines are always in our records," she said. "Your mother knows that either she\'s of Bene G...', ' Your Reverence."\n"He\'s awake and listening to us," said the old woman. "Sly little rascal." She chuckled. "But royalty has need of slyness. And if he\'s really the Kwisatz Haderach . . . well . . ." \n...']


Evaluate pipeline.

In [None]:
import openai
from dspy.evaluate.evaluate import Evaluate

# Define the signature for automatic assessments.
class Assess(dspy.Signature):
    """Assess the quality of a text answer to a question."""

    assessment_question = dspy.InputField()
    assessment_text = dspy.InputField()
    assessment_decision = dspy.OutputField(desc="Yes or No")

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_duneqa = Evaluate(devset=dev, num_threads=1, display_progress=True, display_table=5)

def llm_metric(gold, pred, trace=None):

    print(f'INPUT: {gold.question} | {gold.answer} | {pred.answer}')

    question, answer, prediction = gold.question, gold.answer, pred.answer

    assessment_prompt = f"The text should answer the question `{question}` with `{answer}`. Does the text answer the question?"

    correct =  dspy.Predict(Assess)(assessment_text=prediction, assessment_question=assessment_prompt)

    correct = correct.assessment_decision.strip().lower() == 'yes'

    score = 1 if correct else 0

    return score

evaluate_on_duneqa(compiled_rag, metric=llm_metric)

  0%|          | 0/36 [00:00<?, ?it/s]INPUT: What is the name of Paul Atreides' mother? | Lady Jessica | Jessica
Average Metric: 0 / 1  (0.0):   3%|▎         | 1/36 [00:01<00:50,  1.44s/it]INPUT: What valuable substance is found on the planet Arrakis? | Melange, also known as spice | spice
Average Metric: 0 / 2  (0.0):   6%|▌         | 2/36 [00:03<00:54,  1.61s/it]INPUT: With which Fremen leader does Paul form a close relationship? | Stilgar | Stilgar
Average Metric: 1 / 3  (33.3):   8%|▊         | 3/36 [00:04<00:55,  1.68s/it]INPUT: Which is the baron’s grandson? | Paul Atreides | Feyd-Rautha
Average Metric: 1 / 4  (25.0):  11%|█         | 4/36 [00:06<00:52,  1.65s/it]INPUT: What is the Fremen name for Paul Atreides? | Muad'Dib | Muad'Dib
Average Metric: 2 / 5  (40.0):  14%|█▍        | 5/36 [00:08<00:52,  1.69s/it]INPUT: Does Paul see the future? | Yes | Yes
Average Metric: 3 / 6  (50.0):  17%|█▋        | 6/36 [00:10<01:00,  2.01s/it]INPUT: What does the Kwisatz Haderach bring? | Impo

Unnamed: 0,question,example_answer,context,pred_answer,llm_metric
0,What is the name of Paul Atreides' mother?,Lady Jessica,"[' You\'ll ride upon your own two feet without \'thopter or groundcar or mount.""\nAnd Paul had been caught more by her tone -- singsong and...",Jessica,0
1,What valuable substance is found on the planet Arrakis?,"Melange, also known as spice","[', dust, everything. They can eat flesh off bones and etch the bones to slivers."" \n""Why don\'t they have weather control?""\n""Arrakis has special problems, costs...",spice,0
2,With which Fremen leader does Paul form a close relationship?,Stilgar,"[' joined him at the table, buttoning his jacket. ""What\'ll I be studying on the way across?""\n""Ah-h-h-h, the terranic life forms of Arrakis. The planet...",Stilgar,✔️ [1]
3,Which is the baron’s grandson?,Paul Atreides,"['The ultimate effect for us would be the same, however.""\n""No, it would not!"" the Baron growled. ""I must have him dead and his line ended.""\n""That\'s...",Feyd-Rautha,0
4,What is the Fremen name for Paul Atreides?,Muad'Dib,"[' joined him at the table, buttoning his jacket. ""What\'ll I be studying on the way across?""\n""Ah-h-h-h, the terranic life forms of Arrakis. The planet...",Muad'Dib,✔️ [1]


58.33

Evalute Retrieval.

PROBLEM: my dataset has not 'gold_titles'.