In [1]:
from experiment_project.utils.initial.util import init_sys_env
from experiment_project.utils.files.read import read_yaml
import dspy
from dspy.teleprompt import BootstrapFewShot

from dspy.datasets.gsm8k import GSM8K, gsm8k_metric
init_sys_env()
secret_env_file = '/mnt/d/project/dy/extra/autogen/env_secret_config.yaml'

api_configs = read_yaml(secret_env_file)


In [2]:
# Set up the LM
model_config = api_configs.get('openai')
turbo = dspy.OpenAI(model=model_config.get('model'), max_tokens=520,api_key=model_config.get('api_key'))
dspy.settings.configure(lm=turbo)

# Load math questions from the GSM8K dataset
gsm8k = GSM8K() # GSM8K 数据集的训练部分，包含用于训练模型解决数学问题的数据。通过使用这个训练集，语言模型可以学习并提高在数学推理和问题解决方面的能力。
gsm8k_trainset, gsm8k_devset = gsm8k.train[:10], gsm8k.dev[:10]
gsm8k_trainset
# gsm8k_trainset 和 gsm8k_devset 数据集包含一个示例列表，每个示例都有 question 和 answer 字段。



100%|██████████| 7473/7473 [00:00<00:00, 64498.81it/s]
100%|██████████| 1319/1319 [00:00<00:00, 55060.78it/s]


[Example({'question': "The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?", 'gold_reasoning': "Ella's score is 40 items - 4 items = <<40-4=36>>36 items. Half of Ella's score is 36 items / 2 = <<36/2=18>>18 items. So, Marion's score is 18 items + 6 items = <<18+6=24>>24 items.", 'answer': '24'}) (input_keys={'question'}),
 Example({'question': "Stephen made 10 round trips up and down a 40,000 foot tall mountain. If he reached 3/4 of the mountain's height on each of his trips, calculate the total distance he covered.", 'gold_reasoning': 'Up a mountain, Stephen covered 3/4*40000 = <<3/4*40000=30000>>30000 feet. Coming down, Stephen covered another 30000 feet, making the total distance covered in one round to be 30000+30000 = <<30000+30000=60000>>60000. Since Stephen made 10 round trips up and down the mountain, he covered 10*60000 = <<10*60000=600000>>6

In [3]:
gsm8k_trainset[0]

Example({'question': "The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?", 'gold_reasoning': "Ella's score is 40 items - 4 items = <<40-4=36>>36 items. Half of Ella's score is 36 items / 2 = <<36/2=18>>18 items. So, Marion's score is 18 items + 6 items = <<18+6=24>>24 items.", 'answer': '24'}) (input_keys={'question'})

In [4]:
# 环境设置完成后，让我们定义一个自定义程序，利用 ChainOfThought 模块执行逐步推理以生成答案：

class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought("question -> answer")
    
    def forward(self, question):
        return self.prog(question=question)


In [5]:
from dspy.teleprompt import BootstrapFewShot

# 设置优化器：我们想要“引导”（即自生成）CoT 程序的 4 次示例。
config = dict(max_bootstrapped_demos=4, max_labeled_demos=4)

# 优化！在这里使用 `gsm8k_metric`。通常，度量标准会告诉优化器它的表现如何。
teleprompter = BootstrapFewShot(metric=gsm8k_metric, **config)
optimized_cot = teleprompter.compile(CoT(), trainset=gsm8k_trainset)


 40%|████      | 4/10 [00:00<00:00, 348.24it/s]


In [6]:
# 评估器（Evaluator）用于评估模型在特定数据集上的性能。通过设置评估器，可以多次评估模型，观察其在不同参数配置下的表现。
from dspy.evaluate import Evaluate

# evaluate = Evaluate(
#     devset=gsm8k_devset,    # 评估数据集
#     metric=gsm8k_metric,    # 评估指标
#     num_threads=4,          # 使用的线程数
#     display_progress=True,  # 是否显示进度
#     display_table=0         # 是否显示评估结果表格
# )
# Set up the evaluator, which can be used multiple times.
evaluate = Evaluate(devset=gsm8k_devset, metric=gsm8k_metric, num_threads=8, display_progress=True, display_table=0)

# Evaluate our `optimized_cot` program.
evaluate(optimized_cot)

Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:00<00:00, 1079.34it/s]


100.0

In [7]:
# 在 DSPy 中，turbo.inspect_history(n=1) 用于查看最近一次语言模型的生成记录。这个功能可以帮助您了解模型在生成过程中使用的提示和输出，从而有助于调试和优化提示。
# 查看最近一次生成记录
turbo.inspect_history(n=1)





Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?
Reasoning: Let's think step by step in order to Reasoning: Let's think step by step in order to determine Marion's score. We start by calculating Ella's score. Since the exam has 40 items and Ella got 4 incorrect answers, Ella's score is: \[ 40 - 4 = 36 \] Next, we need to find Marion's score. According to the problem, Marion got 6 more than half the score of Ella. First, we calculate half of Ella's score: \[ \frac{36}{2} = 18 \] Then, we add 6 to this value to find Marion's score: \[ 18 + 6 = 24 \]
Answer: 24

---

Question: Stephen made 10 round trips up and down a 40,000

"\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?\nReasoning: Let's think step by step in order to Reasoning: Let's think step by step in order to determine Marion's score. We start by calculating Ella's score. Since the exam has 40 items and Ella got 4 incorrect answers, Ella's score is: \\[ 40 - 4 = 36 \\] Next, we need to find Marion's score. According to the problem, Marion got 6 more than half the score of Ella. First, we calculate half of Ella's score: \\[ \\frac{36}{2} = 18 \\] Then, we add 6 to this value to find Marion's score: \\[ 18 + 6 = 24 \\]\nAnswer: 24\n\n---\n\nQuestion: Stephen made 10 rou

In [12]:
# Optimizers Evaluators

prog = ChainOfThought(StringSignature(question -> answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    answer = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

In [8]:
# 在 DSPy 中，通过提示器和自定义模块编译得到的 optimized_cot 程序可以直接用于生成答案。您可以向 optimized_cot 传递一个问题，并获取模型生成的答案。
# 使用 optimized_cot 程序回答问题
question = "35+ 29 *23 -89218 是多少?"
answer = optimized_cot(question=question)
answer


Prediction(
    rationale="Reasoning: Let's think step by step in order to produce the answer. We need to follow the order of operations (PEMDAS/BODMAS) to solve the expression. \n\n1. First, we perform the multiplication:\n\\[ 29 \\times 23 = 667 \\]\n\n2. Next, we perform the addition and subtraction from left to right:\n\\[ 35 + 667 - 89218 \\]\n\n3. Add 35 and 667:\n\\[ 35 + 667 = 702 \\]\n\n4. Finally, subtract 89218 from 702:\n\\[ 702 - 89218 = -88516 \\]",
    answer='-88516'
)

In [9]:
turbo.inspect_history(n=10)




Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?
Reasoning: Let's think step by step in order to Reasoning: Let's think step by step in order to determine Marion's score. We start by calculating Ella's score. Since the exam has 40 items and Ella got 4 incorrect answers, Ella's score is: \[ 40 - 4 = 36 \] Next, we need to find Marion's score. According to the problem, Marion got 6 more than half the score of Ella. First, we calculate half of Ella's score: \[ \frac{36}{2} = 18 \] Then, we add 6 to this value to find Marion's score: \[ 18 + 6 = 24 \]
Answer: 24

---

Question: Stephen made 10 round trips up and down a 40,000

"\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?\nReasoning: Let's think step by step in order to Reasoning: Let's think step by step in order to determine Marion's score. We start by calculating Ella's score. Since the exam has 40 items and Ella got 4 incorrect answers, Ella's score is: \\[ 40 - 4 = 36 \\] Next, we need to find Marion's score. According to the problem, Marion got 6 more than half the score of Ella. First, we calculate half of Ella's score: \\[ \\frac{36}{2} = 18 \\] Then, we add 6 to this value to find Marion's score: \\[ 18 + 6 = 24 \\]\nAnswer: 24\n\n---\n\nQuestion: Stephen made 10 rou

In [10]:
turbo.history[-1]

{'prompt': "Given the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?\nReasoning: Let's think step by step in order to Reasoning: Let's think step by step in order to determine Marion's score. We start by calculating Ella's score. Since the exam has 40 items and Ella got 4 incorrect answers, Ella's score is: \\[ 40 - 4 = 36 \\] Next, we need to find Marion's score. According to the problem, Marion got 6 more than half the score of Ella. First, we calculate half of Ella's score: \\[ \\frac{36}{2} = 18 \\] Then, we add 6 to this value to find Marion's score: \\[ 18 + 6 = 24 \\]\nAnswer: 24\n\n---\n\nQuestion: Stephen made 1