# Instantiating ollama Client with llama3.2:1b

In [None]:
# !ollama pull llama3.2:1b

In [2]:
from ollama import Client

client = Client(
  host='http://localhost:11434',
  headers={'x-some-header': 'some-value'}
)
response = client.chat(model='llama3.2:1b', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])

# Configuring the DSPy environment

In [2]:
import dspy

lm = dspy.LM('ollama_chat/llama3.2:1b', api_base='http://localhost:11434', api_key='')
dspy.configure(lm=lm)

In [2]:
lm("Why is the sky blue?")

["The sky appears blue to us because of a phenomenon called Rayleigh scattering, named after the British physicist Lord Rayleigh. He discovered that when sunlight enters Earth's atmosphere, it encounters tiny molecules of gases such as nitrogen and oxygen.\n\nThese molecules scatter the light in all directions, but they scatter shorter (blue) wavelengths more than longer (red) wavelengths. This is because the smaller molecules are more effective at scattering the blue light.\n\nAs a result, the blue light is distributed throughout the atmosphere, giving the sky its blue color. The other colors of the visible spectrum, such as red and orange, are scattered less and remain closer to the direction of the sun, which is why they appear more intense in the sky during the day.\n\nIt's worth noting that the sky can take on a range of colors depending on the time of day, atmospheric conditions, and other factors. For example, during sunrise and sunset, the sky can take on hues of red, orange, a

# Download and prepare q&a data

In [3]:
import ujson
from dspy.utils import download

# Download question--answer pairs from the RAG-QA Arena "Tech" dataset.
download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_examples.jsonl")

with open("ragqa_arena_tech_examples.jsonl") as f:
    data = [ujson.loads(line) for line in f]

# Inspect one datapoint.
data[0]

{'question': 'why igp is used in mpls?',
 'response': "An IGP exchanges routing prefixes between gateways/routers.  \nWithout a routing protocol, you'd have to configure each route on every router and you'd have no dynamic updates when routes change because of link failures. \nFuthermore, within an MPLS network, an IGP is vital for advertising the internal topology and ensuring connectivity for MP-BGP inside the network.",
 'gold_doc_ids': [2822, 2823]}

In [4]:
data = [dspy.Example(**d).with_inputs('question') for d in data]

# Let's pick an `example` here from the data.
example = data[2]
example

Example({'question': 'why are my text messages coming up as maybe?', 'response': 'This is part of the Proactivity features new with iOS 9: It looks at info in emails to see if anyone with this number sent you an email and if it finds the phone number associated with a contact from your email, it will show you "Maybe". \n\nHowever, it has been suggested there is a bug in iOS 11.2 that can result in "Maybe" being displayed even when "Find Contacts in Other Apps" is disabled.', 'gold_doc_ids': [3956, 3957, 8034]}) (input_keys={'question'})

In [5]:
import random

# Shuffle the data for training, dev, and test sets.
random.Random(0).shuffle(data)
trainset, devset, testset = data[:20], data[20:50], data[50:100]

len(trainset), len(devset), len(testset)

(20, 30, 50)

# Set up vector db and collection for retrieval

In [6]:
import chromadb
from chromadb.utils import embedding_functions

persist_directory="vector_db"

chroma_client = chromadb.PersistentClient(path=persist_directory)

# Default: Sentence Transformers all-MiniLM-L6-v2
default_ef = embedding_functions.DefaultEmbeddingFunction()
collection = chroma_client.get_or_create_collection(
    name="ragqa_arena_tech_corpus",
    embedding_function=default_ef,
)

## Prepare corpus for vector db and add to collection

In [None]:
# download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_corpus.jsonl")

In [None]:
import ujson

with open("ragqa_arena_tech_corpus.jsonl") as f:
    corpus = [ujson.loads(line) for line in f]
    print(f"Loaded {len(corpus)} documents. Will encode them below.")

In [None]:
# truncate corpus to max_characters and add to collection

max_characters = 6000  # for truncating >99th percentile of documents
corpus = [
    {
        'text': d['text'][:max_characters],
        'doc_id': d['doc_id'],
        'author': d['author'],
    } 
    for d in corpus
]

In [None]:
collection.add(
    documents=[d['text'] for d in corpus],
    ids=[str(d['doc_id']) for d in corpus]
)

## Set up retriever

In [7]:
from dspy.retrieve.chromadb_rm import ChromadbRM

topk_docs_to_retrieve = 5
collection_name = "ragqa_arena_tech_corpus"

retriever = ChromadbRM(
    collection_name=collection_name,
    k=topk_docs_to_retrieve,
    embedding_function=default_ef,
    client=chroma_client,
    persist_directory=persist_directory,
)

In [13]:
# test retriever
retriever(example['question'])

[{'id': '78968',
  'score': 0.7096676826477051,
  'long_text': 'Youre not sending them all the past messages, its just loading up your past conversation, and your text is the next message in the conversation. You could get it so it doesnt save message histories and then you wouldnt see past messages, or you can have it so it does save messages histories and you do see past messages. But its just what you see thats different.',
  'metadatas': None},
 {'id': '54242',
  'score': 0.7977485656738281,
  'long_text': 'The sent as text message means your text message was sent using SMS, not iMessage. Because the message wasnt sent through iMessage, neither you or the person you are texting to have access to read receipts, which allow you to see whether or not the person you sent the text message to read your message. This often happens if youre in an area with bad signal, with no access to wifi, 3G, or LTE coverage and the message can only be sent via SMS.',
  'metadatas': None},
 {'id': '1404

# Set up RAG

In [None]:
class RAG(dspy.Module):
    def __init__(self, retriever):
        self.respond = dspy.ChainOfThought('context, question -> response')
        self.retriever = retriever

    def forward(self, question):
        docs = self.retriever(question)
        context = "\n\n".join([f"[{i+1}] {doc.long_text}" for i, doc in enumerate(docs)])
        return self.respond(context=context, question=question)

rag = RAG(retriever)
rag(question="what are high memory and low memory on linux?")

In [None]:
dspy.inspect_history()

# Evaluation

In [None]:
from dspy.evaluate import SemanticF1

# Instantiate the metric.
metric = SemanticF1(decompositional=True)

# Produce a prediction from our `rag` module, using the `example` above as input.
pred = rag(**example.inputs())

# Compute the metric score for the prediction.
score = metric(example, pred)

print(f"Question: \t {example.question}\n")
print(f"Gold Response: \t {example.response}\n")
print(f"Predicted Response: \t {pred.response}\n")
print(f"Semantic F1 Score: {score:.2f}")

In [None]:
# Define an evaluator that we can re-use.
evaluate = dspy.Evaluate(devset=devset, metric=metric, display_progress=True, display_table=True)

# Evaluate the Chain-of-Thought program.
evaluate(rag)

In [None]:
cost = sum([x['cost'] for x in lm.history if x['cost'] is not None])  # in USD, as calculated by LiteLLM for certain providers
cost


# Optimizer

In [None]:
tp = dspy.MIPROv2(metric=metric, auto="light", num_threads=2)  # use fewer threads if your rate limit is small

optimized_rag = tp.compile(
    RAG(retriever), 
    trainset=trainset,
    max_bootstrapped_demos=2, 
    max_labeled_demos=2,
    requires_permission_to_run=False
)

In [None]:
baseline = rag(question="cmd+tab does not work on hidden or minimized windows")
print(baseline.response)

In [None]:
pred = optimized_rag(question="cmd+tab does not work on hidden or minimized windows")
print(pred.response)

In [None]:
dspy.inspect_history(n=2)

In [None]:
evaluate(optimized_rag)

# Saving and Loading

In [None]:
# Save the optimized model
optimized_rag.save("rag-optimized.json")

In [None]:
# Load the optimized model
loaded_rag = RAG(retriever)
loaded_rag.load("rag-optimized.json")

loaded_rag(question="cmd+tab does not work on hidden or minimized windows")