Refer to the official DSPy repo and in-depth examples here: https://github.com/stanfordnlp/dspy

### Loading API Keys
Set the environment `OPENAI_API_KEY in a .env file which we will be loading using dotenv.

In [2]:
import dotenv
dotenv.load_dotenv("../.env", override=True)

True

### Setting up the model
We will start by initializing the language model. The model can be easily swapped here for API or local models

In [3]:
import dspy

lm = dspy.OpenAI(model='gpt-3.5-turbo')

dspy.settings.configure(lm=lm)

  from .autonotebook import tqdm as notebook_tqdm


## DSPy fundamentals
## Signature

In [16]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

predictor = dspy.ChainOfThought(BasicQA)
result = predictor(
    question=("What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?")
)
print(result)

Prediction(
    rationale='produce the answer. We can recall that Robert Irvine, the chef and restaurateur featured in Restaurant: Impossible, is from a specific country.',
    answer='British'
)


### Module

In [14]:
class QA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought(BasicQA)
    
    def forward(self, question):
        prediction = self.generate_answer(question=question)
        return prediction
    
qa = QA()
result = qa(
    question=("What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?")
)
print(result)

Prediction(
    rationale='produce the answer. We can recall that Robert Irvine, the chef and restaurateur featured in Restaurant: Impossible, is from a specific country.',
    answer='British'
)


### RAG

In [10]:
# reload the settings to configure retrieval model 
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(lm=lm, rm=colbertv2_wiki17_abstracts)

In [18]:
retrieve = dspy.Retrieve(k=3)
question = "What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?"
topK_passages = retrieve(question).passages

print(f"Top {retrieve.k} passages for question: {question} \n", '-' * 30, '\n')

for idx, passage in enumerate(topK_passages):
    print(f'{idx+1}]', passage, '\n')

Top 3 passages for question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible? 
 ------------------------------ 

1] Restaurant: Impossible | Restaurant: Impossible is an American reality television series, featuring chef and restaurateur Robert Irvine, that aired on Food Network from 2011 to 2016. 

2] Jean Joho | Jean Joho is a French-American chef and restaurateur. He is chef/proprietor of Everest in Chicago (founded in 1986), Paris Club Bistro & Bar and Studio Paris in Chicago, The Eiffel Tower Restaurant in Las Vegas, and Brasserie JO in Boston. 

3] List of Restaurant: Impossible episodes | This is the list of the episodes for the American cooking and reality television series "Restaurant Impossible", produced by Food Network. The premise of the series is that within two days and on a budget of $10,000, celebrity chef Robert Irvine renovates a failing American restaurant with the goal of helping to restore it to profitability and prominence.

In [19]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

rag = RAG()
rag(question)

Prediction(
    context=['Restaurant: Impossible | Restaurant: Impossible is an American reality television series, featuring chef and restaurateur Robert Irvine, that aired on Food Network from 2011 to 2016.', 'Jean Joho | Jean Joho is a French-American chef and restaurateur. He is chef/proprietor of Everest in Chicago (founded in 1986), Paris Club Bistro & Bar and Studio Paris in Chicago, The Eiffel Tower Restaurant in Las Vegas, and Brasserie JO in Boston.', 'List of Restaurant: Impossible episodes | This is the list of the episodes for the American cooking and reality television series "Restaurant Impossible", produced by Food Network. The premise of the series is that within two days and on a budget of $10,000, celebrity chef Robert Irvine renovates a failing American restaurant with the goal of helping to restore it to profitability and prominence. Irvine is assisted by a designer (usually Taniya Nayak, Cheryl Torrenueva, or Lynn Keagan, but sometimes Vanessa De Leon, Krista Watt

#### Datasets/Examples

In [7]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

(20, 50)

In [45]:
trainset[0]

Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys={'question'})

### Optimizers

In [40]:
from dspy.teleprompt import BootstrapFewShot

def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

  0%|          | 0/20 [00:00<?, ?it/s]

 50%|█████     | 10/20 [00:00<00:00, 258.35it/s]

Bootstrapped 4 full traces after 11 examples in round 0.





In [46]:
pred = compiled_rag("What castle did David Gregory inherit?")

print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {pred.context}")

Predicted Answer: Kinnairdy Castle
Retrieved Contexts (truncated): ['David Gregory (physician) | David Gregory (20 December 1625 – 1720) was a Scottish physician and inventor. His surname is sometimes spelt as Gregorie, the original Scottish spelling. He inherited Kinnairdy Castle in 1664. Three of his twenty-nine children became mathematics professors. He is credited with inventing a military cannon that Isaac Newton described as "being destructive to the human species". Copies and details of the model no longer exist. Gregory\'s use of a barometer to predict farming-related weather conditions led him to be accused of witchcraft by Presbyterian ministers from Aberdeen, although he was never convicted.', 'Gregory Tarchaneiotes | Gregory Tarchaneiotes (Greek: Γρηγόριος Ταρχανειώτης , Italian: "Gregorio Tracanioto" or "Tracamoto" ) was a "protospatharius" and the long-reigning catepan of Italy from 998 to 1006. In December 999, and again on February 2, 1002, he reinstituted and confirmed

### Evaluation

In [53]:
from dspy.evaluate.evaluate import Evaluate

evaluate_on_hotpotqa = Evaluate(devset=devset[:3], num_threads=1, display_progress=True, display_table=5)

# Define a metric function. You can define any custom metrics here 
metric = dspy.evaluate.answer_exact_match

evaluate_on_hotpotqa(compiled_rag, metric=metric)

Average Metric: 3 / 3  (100.0): 100%|██████████| 3/3 [00:00<00:00, 33.61it/s]

Average Metric: 3 / 3  (100.0%)



  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,example_answer,gold_titles,context,pred_answer,answer_exact_match
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Qionghai', 'Cangzhou'}","['Cangzhou | Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""or metro"") area...",No,✔️ [True]
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017 NHL Expansion Draft', '2017–18 Pittsburgh Penguins season'}",['2017–18 Pittsburgh Penguins season | The 2017–18 Pittsburgh Penguins season will be the 51st season for the National Hockey League ice hockey team that was...,National Hockey League,✔️ [True]
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006–07 Detroit Red Wings season', 'Steve Yzerman'}","['Steve Yzerman | Stephen Gregory ""Steve"" Yzerman ( ; born May 9, 1965) is a Canadian retired professional ice hockey player and current general manager...",Steve Yzerman,✔️ [True]


100.0

### Structured Outputs

In [None]:
from pydantic import BaseModel, Field

class Output(BaseModel):
  answer: str = Field(
      description = "The answer for the question"
  )
  confidence: float = Field(
      ge=0, le=1,
      description="The confidence score for the answer"
  )

class QA(dspy.Signature):
  """Answer question with short factoid answers."""

  question = dspy.InputField()
  output: Output = dspy.OutputField()

predictor = dspy.TypedPredictor(QA)
predictor(question)

### Assertions & Suggestions

In [None]:
class MathSolver(dspy.Signature):
  """Solves simple math problems"""

  question = dspy.InputField()
  answer = dspy.OutputField(desc="Provide exact answer only")

class MathQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.Predict(MathSolver)

    def forward(self, question):
        pred = self.generate_answer(question=question)
        print(pred)
        dspy.Assert(pred.answer.isdigit(), "Answer must be an int")
        return pred

math_qa = MathQA().activate_assertions()
result = math_qa(
    question=("What is four plus four?")
)
print(result)