<a href="https://colab.research.google.com/github/ayush111111/dspyOllama/blob/main/DSPyWithOllama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ujson



In [None]:
!pip install dspy



In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [None]:
import threading
import subprocess
import time
def run_ollama_serve():
  subprocess.Popen({"ollama", "serve"})

thread = threading.Thread(target=run_ollama_serve)

# start the server in a seperate thread
thread.start()
time.sleep(5)

In [None]:
# !ollama pull qwen2.5-coder:0.5b

In [None]:
# !ollama run qwen2.5-coder:0.5b

In [None]:
def run_ollama_serve():
  subprocess.Popen(["ollama", "run", "llama3.2"])
  # BUG: when model weights are mentioned e.g. llama3.2:3b,
  # litellm.ServiceUnavailableError: OllamaException: HTTPConnectionPool(host='localhost', port=11434):
  # Max retries exceeded with url: /api/generate

thread = threading.Thread(target=run_ollama_serve)

# start the server in a seperate thread
thread.start()
time.sleep(5)


In [None]:
import dspy
sglang_port = 11434
sglang_url = f"http://localhost:{sglang_port}"
sglang_llama = dspy.LM("ollama/llama3.2", api_base=sglang_url)
dspy.configure(lm=sglang_llama)
# # You could also use text mode, in which the prompts are *not* formatted as messages.
# sglang_llama_text = dspy.LM("openai/meta-llama/Meta-Llama-3-8B-Instruct", api_base=sglang_url, model_type='text')

## 1) Modules help you describe AI behavior as code, not strings



In [None]:
math = dspy.ChainOfThought("question -> answer: float")
math(question="what is 32 + 2")

Prediction(
    reasoning='The calculation involves adding 32 and 2.',
    answer=34.0
)

In [None]:
import dspy
# RAG : Retrieve data and Augment it to the LLM to Generate a response
def search_wikipedia(query: str) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=3)
    return [x['text'] for x in results]

rag = dspy.ChainOfThought('context, question -> response')

question = "What's the name of the castle that David Gregory inherited?"
rag(context=search_wikipedia(question), question=question)

Prediction(
    reasoning='David Gregory inherited Kinnairdy Castle in 1664.',
    response='Kinnairdy Castle.'
)

In [None]:
question = "What's the name of the castle that David Gregory inherited?"
search_wikipedia(query=question)

['David Gregory (physician) | David Gregory (20 December 1625 – 1720) was a Scottish physician and inventor. His surname is sometimes spelt as Gregorie, the original Scottish spelling. He inherited Kinnairdy Castle in 1664. Three of his twenty-nine children became mathematics professors. He is credited with inventing a military cannon that Isaac Newton described as "being destructive to the human species". Copies and details of the model no longer exist. Gregory\'s use of a barometer to predict farming-related weather conditions led him to be accused of witchcraft by Presbyterian ministers from Aberdeen, although he was never convicted.',
 'Gregory Tarchaneiotes | Gregory Tarchaneiotes (Greek: Γρηγόριος Ταρχανειώτης , Italian: "Gregorio Tracanioto" or "Tracamoto" ) was a "protospatharius" and the long-reigning catepan of Italy from 998 to 1006. In December 999, and again on February 2, 1002, he reinstituted and confirmed the possessions of the abbey and monks of Monte Cassino in Ascoli

## Optimizers tune the prompts and weights of your AI modules.
Given a few tens or hundreds of representative inputs of your task and a metric that can measure the quality of your system's outputs, you can use a DSPy optimizer. Different optimizers in DSPy work by synthesizing good few-shot examples for every module, like dspy.
1. **BootstrapRS** proposing and intelligently exploring better natural-language instructions for every prompt, like **dspy.MIPROv2**, and
2. building datasets for your modules and using them to finetune the LM weights in your system, like dspy.**BootstrapFinetune**

In [None]:
# class RAG(dspy.Module):
#     def __init__(self, num_docs=5):
#         self.num_docs = num_docs
#         self.respond = dspy.ChainOfThought('context, question -> response')

#     def forward(self, question):
#         context = search(question, k=self.num_docs)   # defined in tutorial linked below
#         return self.respond(context=context, question=question)

# tp = dspy.MIPROv2(metric=dspy.evaluate.SemanticF1(decompositional=True), auto="medium", num_threads=24)
# optimized_rag = tp.compile(RAG(), trainset=trainset, max_bootstrapped_demos=2, max_labeled_demos=2)

In [None]:
import ujson
from dspy.utils import download

# Download question--answer pairs from the RAG-QA Arena "Tech" dataset.
download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_examples.jsonl")

with open("ragqa_arena_tech_examples.jsonl") as f:
    data = [ujson.loads(line) for line in f]

In [None]:
# Inspect one datapoint.
data[0]

{'question': 'why igp is used in mpls?',
 'response': "An IGP exchanges routing prefixes between gateways/routers.  \nWithout a routing protocol, you'd have to configure each route on every router and you'd have no dynamic updates when routes change because of link failures. \nFuthermore, within an MPLS network, an IGP is vital for advertising the internal topology and ensuring connectivity for MP-BGP inside the network.",
 'gold_doc_ids': [2822, 2823]}

In [None]:
from dspy import Example
data = [Example(**d).with_inputs('question') for d in data]

# Let's pick an `example` here from the data.
example = data[2]
example

Example({'question': 'why are my text messages coming up as maybe?', 'response': 'This is part of the Proactivity features new with iOS 9: It looks at info in emails to see if anyone with this number sent you an email and if it finds the phone number associated with a contact from your email, it will show you "Maybe". \n\nHowever, it has been suggested there is a bug in iOS 11.2 that can result in "Maybe" being displayed even when "Find Contacts in Other Apps" is disabled.', 'gold_doc_ids': [3956, 3957, 8034]}) (input_keys={'question'})

In [None]:
import random

random.Random(0).shuffle(data)
trainset, devset, testset = data[:200], data[200:500], data[500:1000]

len(trainset), len(devset), len(testset)

(200, 300, 500)

In [None]:
from dspy.evaluate import SemanticF1
# https://dspy.ai/tutorials/rag/

from dspy import ChainOfThought
# Instantiate the metric.
metric = SemanticF1(decompositional=True)
cot = ChainOfThought('question -> response')
# Produce a prediction from our `cot` module, using the `example` above as input.
pred = cot(**example.inputs())

# Compute the metric score for the prediction.
score = metric(example, pred)

print(f"Question: \t {example.question}\n")
print(f"Gold Response: \t {example.response}\n")
print(f"Predicted Response: \t {pred.response}\n")
print(f"Semantic F1 Score: {score:.2f}")

AttributeError: 'list' object has no attribute 'items'

In [None]:
dspy.inspect_history(n=1)


In [None]:
# Define an evaluator that we can re-use.
evaluate = dspy.Evaluate(devset=devset, metric=metric, num_threads=24,
                         display_progress=True, display_table=2)

# Evaluate the Chain-of-Thought program.
evaluate(cot)