In [1]:
import sys
import os
from pathlib import Path

In [2]:
# Agrego la carpeta src al sys path para usar los scripts que desarrolle dentro de dicha carpeta
sys.path.append(str((Path(os.pardir) / 'src').resolve()))

In [35]:
assert os.environ['OPENAI_API_KEY'] != ""

---

In [7]:
import dspy

# Minimal Example 

## Setup

In [13]:
import dspy
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric

# Set up the LM.
turbo = dspy.OpenAI(model='gpt-3.5-turbo-instruct', max_tokens=250)
dspy.settings.configure(lm=turbo)

# Load math questions from the GSM8K dataset.
gsm8k = GSM8K()
gsm8k_trainset, gsm8k_devset = gsm8k.train, gsm8k.dev

100%|██████████| 7473/7473 [00:00<00:00, 45892.04it/s]
100%|██████████| 1319/1319 [00:00<00:00, 63480.06it/s]


In [14]:
print(
f"""
Train set: {len(gsm8k_trainset)}
Test set: {len(gsm8k_devset)}
"""
)


Train set: 200
Test set: 300



In [15]:
gsm8k_trainset[0]

Example({'question': "The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?", 'gold_reasoning': "Ella's score is 40 items - 4 items = <<40-4=36>>36 items. Half of Ella's score is 36 items / 2 = <<36/2=18>>18 items. So, Marion's score is 18 items + 6 items = <<18+6=24>>24 items.", 'answer': '24'}) (input_keys={'question'})

## Module definition

In [17]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought("question -> answer")
    
    def forward(self, question):
        return self.prog(question=question)

## Optimation definition

In [18]:
from dspy.teleprompt import BootstrapFewShot

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 4-shot examples of our CoT program.
config = dict(max_bootstrapped_demos=4, max_labeled_demos=4)

# Optimize! Use the `gsm8k_metric` here. In general, the metric is going to tell the optimizer how well it's doing.
teleprompter = BootstrapFewShot(metric=gsm8k_metric, **config)
optimized_cot = teleprompter.compile(CoT(), trainset=gsm8k_trainset)

  2%|▎         | 5/200 [00:19<12:27,  3.84s/it]


## Evaluate 

In [19]:
from dspy.evaluate import Evaluate

# Set up the evaluator, which can be used multiple times.
evaluate = Evaluate(devset=gsm8k_devset, metric=gsm8k_metric, num_threads=4, display_progress=True, display_table=0)

# Evaluate our `optimized_cot` program.
evaluate(optimized_cot)

Average Metric: 224 / 300  (74.7): 100%|██████████| 300/300 [02:55<00:00,  1.71it/s]


74.67

## Tryout 

In [30]:
pred = optimized_cot(question="Sarah buys 10 pencils on Monday. Then she buys 14 more pencils on Tuesday. On Wednesday she buys triple the number of pencils she did on Tuesday. How many pencils does she have?")

In [36]:
pred.answer

'66'

In [37]:
pred.rationale

'find the answer. We know that Sarah bought 10 pencils on Monday and 14 more on Tuesday. This means she has 10 + 14 = 24 pencils. On Wednesday, she bought triple the number of pencils she did on Tuesday, which is 3 * 14 = 42 pencils. Therefore, Sarah has 24 + 42 = 66 pencils.'

## Prompt

In [52]:
print(turbo.history[-1]['prompt'] + turbo.history[-1]['response']['choices'][0]['text'])

Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?
Reasoning: Let's think step by step in order to find Marion's score. We know that Ella got 4 incorrect answers, which means she got 36 correct answers. We also know that Marion got 6 more than half of Ella's score, which is 6 more than 36/2 = 18. Therefore, Marion's score is 18 + 6 = 24.
Answer: 24

---

Question: Bridget counted 14 shooting stars in the night sky. Reginald counted two fewer shooting stars than did Bridget, but Sam counted four more shooting stars than did Reginald. How many more shooting stars did Sam count in the night sky than was the average number of shoo