In [3]:
import dspy

lm = dspy.LM('openai/gpt-4o-mini')
dspy.configure(lm=lm)

In [4]:
qa = dspy.Predict('question: str -> response: str')
response = qa(question="what are high memory and low memory on linux?")

print(response.response)

AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:
dspy.inspect_history(n=1)

In [None]:
cot = dspy.ChainOfThought('question -> response')
cot(question="should curly braces appear on their own line?")

In [None]:
# So far, we built a very simple chain-of-thought module for question answering and evaluated it on a small dataset.
# now we do thee RAG!!!

In [None]:
# full code block 
download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_corpus.jsonl")

# as far as DSPy is concerned, you can plug in any Python code for calling tools or retrievers.

# %pip install -U faiss-cpu  # or faiss-gpu if you have a GPU

max_characters = 6000  # for truncating >99th percentile of documents
topk_docs_to_retrieve = 5  # number of documents to retrieve per search query

with open("ragqa_arena_tech_corpus.jsonl") as f:
    corpus = [ujson.loads(line)['text'][:max_characters] for line in f]
    print(f"Loaded {len(corpus)} documents. Will encode them below.")

embedder = dspy.Embedder('openai/text-embedding-3-small', dimensions=512)
search = dspy.retrievers.Embeddings(embedder=embedder, corpus=corpus, k=topk_docs_to_retrieve)

class RAG(dspy.Module):
    def __init__(self):
        self.respond = dspy.ChainOfThought('context, question -> response')

    def forward(self, question):
        context = search(question).passages
        return self.respond(context=context, question=question)
    
rag = RAG()
rag(question="what are high memory and low memory on linux?")

dspy.inspect_history()

evaluate(RAG())

tp = dspy.MIPROv2(metric=metric, auto="medium", num_threads=24)  # use fewer threads if your rate limit is small

optimized_rag = tp.compile(RAG(), trainset=trainset,
                           max_bootstrapped_demos=2, max_labeled_demos=2,
                           requires_permission_to_run=False)

baseline = rag(question="cmd+tab does not work on hidden or minimized windows")
print(baseline.response)

pred = optimized_rag(question="cmd+tab does not work on hidden or minimized windows")
print(pred.response)

# You can use dspy.inspect_history(n=2) to view the RAG prompt before optimization and after optimization.

cost = sum([x['cost'] for x in lm.history if x['cost'] is not None])  # in USD, as calculated by LiteLLM for certain providers

optimized_rag.save("optimized_rag.json")

loaded_rag = RAG()
loaded_rag.load("optimized_rag.json")

loaded_rag(question="cmd+tab does not work on hidden or minimized windows")

