In [1]:
import sys
import os
import dspy
from dspy.datasets import HotPotQA
from dspy.teleprompt import BootstrapFewShot
from dspy.evaluate.evaluate import Evaluate
from dsp.utils import deduplicate
from rich import print
from dsp import LM
from dspy.datasets import HotPotQA
import dspy
from dspy.evaluate.evaluate import Evaluate
import dspy
from functools import cache

In [2]:
lm1 = dspy.OllamaLocal(model='llama3.1:8b-instruct-q4_K_M',max_tokens=150,timeout_s=1000)

In [3]:
lm = dspy.OllamaLocal(model='llama2:latest',timeout_s=1000)


In [4]:
dspy.settings.configure(lm=lm)

Giving Signature as Variable signature turned into prompt in Predict class

In [41]:
# Define signature
signature = 'sentence -> sentiment'
classify = dspy.Predict(signature)

# Run
sentence = "it's a charming and often affecting journey."
classify(sentence=sentence).sentiment

"Sure! Here are the fields for the given sentence:\n\nSentence: it's a charming and often affecting journey.\nSentiment: POSITIVE"

To obtain the prompt sent to the llm

In [42]:
lm.inspect_history(n=1)





Given the fields `sentence`, produce the fields `sentiment`.

---

Follow the following format.

Sentence: ${sentence}
Sentiment: ${sentiment}

---

Sentence: it's a charming and often affecting journey.
Sentiment:[32m Sure! Here are the fields for the given sentence:

Sentence: it's a charming and often affecting journey.
Sentiment: POSITIVE[0m





Class based Signature

In [43]:
# Define signature in Class-based form
class Emotion(dspy.Signature):
    # Describe the task
    """Classify emotions in a sentence."""
    
    sentence = dspy.InputField()
    # Adding description to the output field
    sentiment = dspy.OutputField(desc="Possible choices: sadness, joy, love, anger, fear, surprise.")

classify_class_based = dspy.Predict(Emotion)

# Issue prediction
classify_class_based(sentence=sentence).sentiment

"Sure! Here's the classification of the emotion in the sentence you provided:\n\nSentence: it's a charming and often affecting journey.\nSentiment: joy"

    The description when forming the class under ( describe the task) we add also becomes the part of prompt

In [11]:
lm.inspect_history(n=1)

Using chain of thought

In [44]:
sentence1="Now a days, my life is just all about going to university to attend classes, then go to office to work and then go to gym to train and then going to sleep"

In [49]:
# Apply the class-based signature to Chain of Thought
classify_cot = dspy.ChainOfThought(Emotion)

# Run
classify_cot(sentence=sentence1).sentiment



"Sure! Here's my classification of the emotion in the sentence you provided:\n\nSentence: Now a days, my life is just all about going to university to attend classes, then go to office to work and then go to gym to train and then going to sleep.\nReasoning: Let's think step by step in order to understand the sentiment. We have a repetitive daily routine that involves attending classes, working at an office, exercising at the gym, and finally going to sleep.\nSentiment: Possible choices: monotony, routine, exhaustion, frustration.\n\nThe reason I chose these emotions is because the sentence describes a very structured and"

In [50]:
# Inspect prompt
lm.inspect_history(n=1)





Classify emotions in a sentence.

---

Follow the following format.

Sentence: ${sentence}
Reasoning: Let's think step by step in order to ${produce the sentiment}. We ...
Sentiment: Possible choices: sadness, joy, love, anger, fear, surprise.

---

Sentence: Now a days, my life is just all about going to university to attend classes, then go to office to work and then go to gym to train and then going to sleep
Reasoning: Let's think step by step in order to Sentiment: Possible choices: routine, monotony, exhaustion, frustration.
Sentiment:[32m Sure! Here's my classification of the emotion in the sentence you provided:

Sentence: Now a days, my life is just all about going to university to attend classes, then go to office to work and then go to gym to train and then going to sleep.
Reasoning: Let's think step by step in order to understand the sentiment. We have a repetitive daily routine that involves attending classes, working at an office, exercising at the gym, and finally go

Declaring the retreiver for RAG model

In [5]:
# Configure retriever
rm = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(rm = rm)


we define the RAG class inherited from dspy.Module. It needs two methods:

The __init__ method will simply declare the sub-modules it needs: dspy.Retrieve and dspy.ChainOfThought. The latter is defined to implement our context, question -> answer signature.
The forward method will describe the control flow of answering the question using the modules we have.

In [6]:
# Define a class-based signature
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

# Chain different modules together to retrieve information from Wikipedia Abstracts 2017, then pass it as context for Chain of Thought to generate an answer
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)

        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        answer = self.generate_answer(context=context, question=question)
        return answer

In [7]:
# Initilize our RAG class
rag = RAG()

# # Define a question and pass it into the RAG class
# my_question = "When was the first FIFA World Cup held?"
# rag(question=my_question).answer

In [8]:
lm.inspect_history(n=1)

OPTIMIZERS FOR PROMPT TUNING

In [29]:
from dspy.datasets.hotpotqa import HotPotQA

# For demonstration purpose we will use a small subset of the HotPotQA dataset, 20 for training and testing each
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=20, test_size=20)
trainset = [x.with_inputs('question') for x in dataset.train]
testset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(testset)

  table = cls._concat_blocks(blocks, axis=0)


(20, 20)

In [30]:
print("The data set is ",dataset)
print("\n")
print("The train set is ",trainset)
print("\n")
print("The test set is ",testset)

In [57]:
from dspy.teleprompt import BootstrapFewShot

# Simple optimizer example. I am explicitly stating the default values for max_bootstrapped_demos and max_labeled_demos for demonstration purposes
optimizer = BootstrapFewShot(metric=dspy.evaluate.answer_exact_match, max_bootstrapped_demos=3,teacher_settings=lm1)

In [58]:


# Compile!
compiled_rag = optimizer.compile(RAG(), trainset=trainset)

100%|██████████| 20/20 [3:51:56<00:00, 695.80s/it]   

Bootstrapped 0 full traces after 20 examples in round 0.





In [66]:
compiled_rag(question="William Hughes Miller was born in a city with how many inhabitants ?")

Prediction(
    rationale='Context:\n[1] «William Hughes Miller | William Hughes Miller (born March 16, 1941, Kosciusko, Mississippi) is a professor at the University of California, Berkeley and a leading researcher in the field of theoretical chemistry.»\n[2] «William Green Miller | William Green Miller (31 March 1908 – 1974) was an English professional footballer who made four appearances in the Football League, three for Luton Town and one for Southport. He was also on the books of Bolton Wanderers and Stockport County, without appearing in the Football League, and also played for several non-League clubs in',
    answer='The answer to the question is: Kosciusko, Mississippi.\n\nReasoning: Based on the information provided in the context, we know that William Hughes Miller was born in a city called Kosciusko. The population of Kosciusko, Mississippi is approximately 7,000 people, according to the United States Census Bureau in 2020.'
)

In [67]:
lm.inspect_history(n=1)





Answer questions with short factoid answers.

---

Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times

Question: This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?
Answer: The Waltz King

Question: On the coast of what ocean is the birthplace of Diogal Sakho?
Answer: Atlantic

Question: The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?
Answer: 1950

Question: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?
Answer: 2010

Question: Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?
Answer: Buena Vista Distribution

Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field

In [68]:
compiled_rag(question="What is the capital of Pakistan")

Prediction(
    rationale='Answer: Islamabad',
    answer="Great! Let's break down the reasoning for the answer:\n\n1. Context: The passage provides information about the location of the capital city of Pakistan.\n2. Reasoning: To determine the capital of Pakistan, we need to think step by step. We know that the capital city is located within the federal Islamabad Capital Territory and is the political seat of Pakistan.\n3. Answer: Based on the information provided, the capital of Pakistan is Islamabad.\n\nSo, the answer is Islamabad!"
)

In [None]:

compiled_rag(question="At My Window was released by which American singer-songwriter?")

In [31]:
from dspy.teleprompt import BootstrapFewShot
teacher_settings = {
    "lm": lm1,
    "verbose": True
    # Add any other relevant settings here
}

# Simple optimizer example. I am explicitly stating the default values for max_bootstrapped_demos and max_labeled_demos for demonstration purposes
optimizer = BootstrapFewShot(metric=dspy.evaluate.answer_exact_match, max_bootstrapped_demos=3,teacher_settings=teacher_settings)

In [33]:
BootstrapFewShot?

[1;31mInit signature:[0m
[0mBootstrapFewShot[0m[1;33m([0m[1;33m
[0m    [0mmetric[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmetric_threshold[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mteacher_settings[0m[1;33m=[0m[1;33m{[0m[1;33m}[0m[1;33m,[0m[1;33m
[0m    [0mmax_bootstrapped_demos[0m[1;33m=[0m[1;36m4[0m[1;33m,[0m[1;33m
[0m    [0mmax_labeled_demos[0m[1;33m=[0m[1;36m16[0m[1;33m,[0m[1;33m
[0m    [0mmax_rounds[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m[1;33m
[0m    [0mmax_errors[0m[1;33m=[0m[1;36m5[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m      <no docstring>
[1;31mFile:[0m           c:\users\my it store\appdata\local\programs\python\python311\lib\site-packages\dspy\teleprompt\bootstrap.py
[1;31mType:[0m           type
[1;31mSubclasses:[0m     

In [32]:

compiled_rag1 = optimizer.compile(RAG(), trainset=trainset)

100%|██████████| 20/20 [1:14:21<00:00, 223.07s/it]


Bootstrapped 0 full traces after 20 examples in round 0.


In [34]:
compiled_rag1(question="William Hughes Miller was born in a city with how many inhabitants ?")

Prediction(
    rationale='Context:\n[1] «William Hughes Miller | William Hughes Miller (born March 16, 1941, Kosciusko, Mississippi) is a professor at the University of California, Berkeley and a leading researcher in the field of theoretical chemistry.»\n[2] «William Green Miller | William Green Miller (31 March 1908 – 1974) was an English professional footballer who made four appearances in the Football League, three for Luton Town and one for Southport. He was also on the books of Bolton Wanderers and Stockport County, without appearing in the Football League, and also played for several non-League clubs in',
    answer='The answer to the question is: Kosciusko, Mississippi.\n\nReasoning: Based on the information provided in the context, we know that William Hughes Miller was born in a city called Kosciusko. The population of Kosciusko, Mississippi is approximately 7,000 people, according to the United States Census Bureau in 2020.'
)

In [37]:
lm.inspect_history(n=3)





Answer questions with short factoid answers.

---

Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times

Question: At My Window was released by which American singer-songwriter?
Answer: John Townes Van Zandt

Question: Which of these publications was most recently published, Who Put the Bomp or Self?
Answer: Self

Question: which American actor was Candace Kita guest starred with
Answer: Bill Murray

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: often between 1 and 5 words

---

Context:
[1] «The Victorians | The Victorians - Their Story In Pictures is a 2009 British documentary series which focuses on Victorian art and culture. The four-part series is written and presented by Jeremy Paxman and debuted on BBC One at 9:00pm on Sunday 15 February 2009.»
[2] «Victorian (comics) | 

In [43]:
compiled_rag1(question="What is the capital of USA")

Prediction(
    rationale='Context:\n[1] «United States capital (disambiguation) | The capital of the United States is Washington, D.C.»\n[2] «List of capitals in the United States | Washington, D.C. is the current federal capital city of the United States, as it has been since 1800. Each U.S. state has its own capital city, as do many of its Insular areas. Historically, most states have not changed their capital city since becoming a state, but the capital cities of their respective preceding colonies, territories, kingdoms, and republics typically changed multiple times. There have also been other governments within the current borders of the United States with their',
    answer="Reasoning: Let's think step by step in order to answer the question. The capital of the United States is Washington, D.C., as stated in Context [1]. Additionally, according to Context [2], Washington, D.C. has been the current federal capital city of the United States since 1800. Finally, Context [3] confir

In [45]:
lm.inspect_history(n=1)





Answer questions with short factoid answers.

---

Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times

Question: This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?
Answer: The Waltz King

Question: On the coast of what ocean is the birthplace of Diogal Sakho?
Answer: Atlantic

Question: The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?
Answer: 1950

Question: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?
Answer: 2010

Question: Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?
Answer: Buena Vista Distribution

Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field

In [42]:
lm.inspect_history(n=2)





Answer questions with short factoid answers.

---

Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times

Question: This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?
Answer: The Waltz King

Question: On the coast of what ocean is the birthplace of Diogal Sakho?
Answer: Atlantic

Question: The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?
Answer: 1950

Question: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?
Answer: 2010

Question: Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?
Answer: Buena Vista Distribution

Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field

In [39]:
lm.history[-1]['prompt']

'Answer questions with short factoid answers.\n\n---\n\nQuestion: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?\nAnswer: Tae Kwon Do Times\n\nQuestion: This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?\nAnswer: The Waltz King\n\nQuestion: On the coast of what ocean is the birthplace of Diogal Sakho?\nAnswer: Atlantic\n\nQuestion: The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?\nAnswer: 1950\n\nQuestion: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?\nAnswer: 2010\n\nQuestion: Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?\nAnswer: Buena Vista Distribution\n\nQuestion: Samantha Cristoforetti and Mark Shuttleworth are both best known for being f

In [None]:
lm.inspect_history(n=3)

In [46]:
lm1.inspect_history(n=1)





Answer questions with short factoid answers.

---

Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where?
Answer: space

Question: Having the combination of excellent foot speed and bat speed helped Eric Davis, create what kind of outfield for the Los Angeles Dodgers?
Answer: "Outfield of Dreams"

Question: which American actor was Candace Kita guest starred with
Answer: Bill Murray

Question: Tombstone stared an actor born May 17, 1955 known as who?
Answer: Bill Paxton

Question: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?
Answer: 2010

Question: Which is taller, the Empire State Building or the Bank of America Tower?
Answer: The Empire State Building

Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times

Question: Which American actress who made their film