In [4]:
import os
import dspy
import mlflow
import pandas as pd
from dotenv import load_dotenv
import kagglehub
import ujson

In [5]:
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
lm = dspy.LM("gpt-4o", api_key=openai_api_key)

In [6]:
local_lm = dspy.LM("ollama_chat/llama3.2", api_base="http://localhost:11434", api_key="")
dspy.configure(lm=local_lm)

In [7]:
class Chat(dspy.Signature):
    "You are a helpful assistant that answer questions about cats and dogs."
    question: str = dspy.InputField(desc="Questions asked by the user")
    response: str = dspy.OutputField(desc="Response to the question")

class Model(dspy.Module):
    def __init__(self):
        super().__init__()
        self.respond = dspy.ChainOfThought(Chat)

    def forward(self, question: str):
        return self.respond(question=question)

In [8]:
model = Model()

In [9]:
#mlflow ui --port 5000
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("DSPy")
mlflow.dspy.autolog()

In [10]:
import logging
logging.getLogger('mlflow').setLevel(logging.WARNING)

Laddar ner dataset med beskrivning: "Contains 583 unique questions and answers related to dogs and cats.
It includes questions related to health, training, lifestyle, breeds and commonly asked questions."

In [11]:
path = kagglehub.dataset_download("bishnushahi/dog-cat-qa")

In [12]:

csv_path = os.path.join(path, 'Dog-Cat-QA.csv')

In [13]:
df = pd.read_csv(csv_path)
df.head()

Unnamed: 0.1,Unnamed: 0,Question,Answer
0,0,What are some examples of breed-typical person...,Breed-typical personalities in dogs are develo...
1,1,How do working and herding dogs typically behave?,Working and herding dogs have business-like di...
2,2,Which breed of dogs is known for strong loyalty?,Collies and Akitas are known for their strong ...
3,3,What instincts do guarding dogs typically disp...,Guarding dogs tend to be protective of their t...
4,4,How does breed specificity impact a dog's abil...,Breed specificity affects how well dogs adapt ...


In [14]:
df = df.drop('Unnamed: 0', axis=1)
print(df.head())

                                            Question  \
0  What are some examples of breed-typical person...   
1  How do working and herding dogs typically behave?   
2   Which breed of dogs is known for strong loyalty?   
3  What instincts do guarding dogs typically disp...   
4  How does breed specificity impact a dog's abil...   

                                              Answer  
0  Breed-typical personalities in dogs are develo...  
1  Working and herding dogs have business-like di...  
2  Collies and Akitas are known for their strong ...  
3  Guarding dogs tend to be protective of their t...  
4  Breed specificity affects how well dogs adapt ...  


In [15]:
df.columns = ['question', 'response']

Gör om det till dictionary par:

In [16]:
data = df.to_dict(orient='records')
data = [dspy.Example(**d).with_inputs('question') for d in data]

In [17]:
example = data[2]
example

Example({'question': 'Which breed of dogs is known for strong loyalty?', 'response': 'Collies and Akitas are known for their strong sense of loyalty.'}) (input_keys={'question'})

Till skillnad från vanlig ML träning: Så ska man dela upp datasetet med 20 procent till träning och 80 procent till validering. 

"For prompt optimizers in particular, it's often better to pass more validation than training.
20% training and 80% validation.

In [18]:
print(len(data))

583


In [19]:
import random

random.Random(0).shuffle(data) 

trainset = data[:116]   # 20% 
valset   = data[116:466]  # 60% 
testset  = data[466:583]     # 20% 

len(trainset), len(valset), len(testset)

(116, 350, 117)

Nu laddar jag in en Evaluation metric som i sig också är en DSPy module: 

In [20]:
from dspy.evaluate import SemanticF1

In [21]:
metric = SemanticF1(decompositional=True)

# Produce a prediction from our `cot` module, using the `example` above as input.
pred = model(example.inputs())

# Compute the metric score for the prediction.
score = metric(example, pred)

print(f"Question: \t {example.question}\n")
print(f"Gold Response: \t {example.response}\n")
print(f"Predicted Response: \t {pred.response}\n")
print(f"Semantic F1 Score: {score:.2f}")

[92m12:34:19 - LiteLLM:INFO[0m: utils.py:2873 - 
LiteLLM completion() model= llama3.2; provider = ollama_chat
INFO:LiteLLM:
LiteLLM completion() model= llama3.2; provider = ollama_chat


KeyboardInterrupt: 

In [41]:
dspy.inspect_history(n=1)





[34m[2025-05-18T07:32:01.291338][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str)
2. `ground_truth` (str)
3. `system_response` (str)
Your output fields are:
1. `reasoning` (str)
2. `ground_truth_key_ideas` (str): enumeration of key ideas in the ground truth
3. `system_response_key_ideas` (str): enumeration of key ideas in the system response
4. `discussion` (str): discussion of the overlap between ground truth and system response
5. `recall` (float): fraction (out of 1.0) of ground truth covered by the system response
6. `precision` (float): fraction (out of 1.0) of system response covered by the ground truth
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## ground_truth ## ]]
{ground_truth}

[[ ## system_response ## ]]
{system_response}

[[ ## reasoning ## ]]
{reasoning}

[[ ## ground_truth_key_ideas ## ]]
{ground_truth_key_ideas}

[[ ## system_response_key_ideas ## ]]


In [42]:
evaluate = dspy.Evaluate(devset=trainset, metric=metric,
                         display_progress=True, display_table=2, )

In [43]:
evaluate(model)

Average Metric: 77.25 / 116 (66.6%): 100%|██████████| 116/116 [00:01<00:00, 64.49it/s]

2025/05/18 07:32:03 INFO dspy.evaluate.evaluate: Average Metric: 77.25493796462922 / 116 (66.6%)





Unnamed: 0,question,example_response,reasoning,pred_response,SemanticF1
0,Describe the distinctive appearance and temperament of the Birman ...,: The Birman cat is colorpointed with long silky hair and four pur...,The Birman cat is a breed known for its distinctive appearance and...,The Birman cat is a breed that combines striking appearance with a...,✔️ [0.774]
1,What other considerations should pet owners keep in mind when choo...,"Pet owners should consider factors like cost, refrigeration requir...","When choosing between wet and dry cat food, pet owners should also...","When choosing between wet and dry cat food, consider your cat's in...",✔️ [0.667]


66.6

In [None]:
from dspy.teleprompt import MIPROv2

optimizer = MIPROv2(
    metric=metric, 
    num_threads=1
)

print("Optimizing zero-shot program MIPROv2...")
zero_shot_program = optimizer.compile(
    model.deepcopy(),
    trainset=trainset,
    valset=valset,
    requires_permission_to_run=False,
)