In [12]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import json
import random
from http import HTTPStatus
import dashscope
from dashscope.api_entities.dashscope_response import Role

In [14]:
class AQuADataset(Dataset):
    def __init__(self, data_path, max_records=10):
        with open(data_path, 'r') as file:
            data = [json.loads(line) for line in file]
        data = data[:max_records]
        df = pd.DataFrame(data)
        df = df.head(max_records)
        
        def combine(row):
            choices = [f"{chr(65 + i)}: {option}" for i, option in enumerate(row['options'])]
            return f"Question: {row['question']} Choices: {', '.join(choices)}"
        
        self.inputs = df.apply(combine, axis=1).tolist()
        self.labels = [row['correct'] for row in data]
        
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        return self.inputs[idx], self.labels[idx]

# 修改数据路径
aqua_data_path = '/data_zju_2/chenkerui/train.json'

# 实例化AQuA数据集
aqua_dataset = AQuADataset(aqua_data_path)

# 创建DataLoader
aqua_dataloader = DataLoader(aqua_dataset, batch_size=10, shuffle=False)

In [15]:
for batch in aqua_dataloader:
    inputs, labels = batch
    print(f"Inputs: {inputs}")
    print(f"Labels: {labels}")
    break

# 以下代码保持不变
dashscope.api_key = "sk-ea4a0e21cc10489f920bbbf3863d1475"

Inputs: ("Question: Two friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other? Choices: A: A)21, B: B)21.5, C: C)22, D: D)22.5, E: E)23", 'Question: In the coordinate plane, points (x, 1) and (5, y) are on line k. If line k passes through the origin and has slope 1/5, then what are the values of x and y respectively? Choices: A: A)4 and 1, B: B)1 and 5, C: C)5 and 1, D: D)3 and 5, E: E)5 and 3', 'Question: For all numbers p and q, the operation @ is defined by p@q = p^2 - pq. If xy ≠ 0, then which of the following can be equal to zero?\nI. x@y\nII. (xy)@y\nIII. x@(x + y) Choices: A: A)II, B: B)I and II, C: C)I and III, D: D)II and III, E: E)All of the above', 'Question: Carl is facing very difficult financial times and can only pay the interest on a $10,000 loan he has taken. The bank charges him a quarterly compound rate

In [16]:
# 定义调用函数
def call_with_messages(model_name: str, content: str):
    messages = [{'role': 'user', 'content': content}]
    response = dashscope.Generation.call(
        model_name,  # 使用指定模型
        messages=messages,
        result_format='message',
        max_tokens=500
    )
    if response.status_code == HTTPStatus.OK:
        return response.output.choices[0]['message']['content']
    else:
        print(f"Request id: {response.request_id}, Status code: {response.status_code}, error code: {response.code}, error message: {response.message}")
        return None

# 定义推理函数
def inference(input):
    outputs = call_with_messages('qwen-long', input)
    if outputs:
        print("Inference outputs:", outputs)
        return outputs
    else:
        print("Error in getting inference output.")
        return ""

In [17]:
# 定义验证函数
def verify_answer(question, cot):
    verification_prompt = f"Q: {question}. Is the inference process {cot} correct? Just reply me with yes or no"
    verification_result = call_with_messages('qwen-turbo', verification_prompt)
    print(verification_result)
    if verification_result:
        return "yes" in verification_result.lower()
    else:
        print("Error in getting verification output.")
        return False

# 定义重新推理函数
def re_inference(input):
    outputs = call_with_messages('qwen-turbo', input)
    if outputs:
        print("Re-inference outputs:", outputs)
        return outputs
    else:
        print("Error in getting re-inference output.")
        return ""


In [19]:
# 定义训练轮次
epochs = 1

# 初始化准确率列表
accuracies = []

for epoch in range(epochs):
    correct_count = 0
    total_count = 0
    
    for batch in aqua_dataloader:  # 确保提前定义commonsenseqa_dataloader
        inputs, labels = batch

        for question, true_label in zip(inputs, labels):
            # Step 1: Generate initial prompt X0
            X0 = f"Q: {question} A: Let's think step by step(Dont show so many steps in detail)."
            # Generate the output for the first step
            Z = inference(X0)
            
            # Step 2: Generate the second step prompt
            final_prompt = f"{X0} {Z} Therefore, among A through E, the answer is"
            
            # Generate the final output
            predicted_answer = inference(final_prompt)
            
            # Extract the first capital letter as the prediction
            prediction = ""
            for char in predicted_answer:
                if char.isupper():
                    prediction = char
                    break

            # 验证预测答案
            if not verify_answer(question, final_prompt+prediction):
                # 如果验证失败，重新推理答案
                reinference_prompt = f"{X0}" + f" The answer is not {prediction}." + f" Therefore, the answer (chosen among A through E) is"
                corrected_answer = re_inference(reinference_prompt)
                for char in corrected_answer:
                    if char.isupper():
                        prediction = char
                        break

            # 比较预测答案和真实标签并更新计数
            if prediction == true_label:
                correct_count += 1
            total_count += 1
        break

    # 计算当前轮次的准确率
    accuracy = correct_count / total_count if total_count > 0 else 0
    accuracies.append(accuracy)
    print(f"Epoch {epoch + 1}/{epochs} - Accuracy: {accuracy * 100:.2f}%")

# 输出每个轮次的总体准确率
print("Accuracies over epochs:", accuracies)


Inference outputs: To solve this problem, we don't need to calculate the exact time they will meet. Instead, we can focus on the relative speeds of the two friends.

Let's say Friend Q's speed is V km/h. Then Friend P's speed is 1.15V (15% faster).

Since they start at opposite ends and walk towards each other, their combined speed is V + 1.15V = 2.15V.

The distance they will cover before meeting is half the total trail length, which is 43 km / 2 = 21.5 km.

The time it takes for them to meet (t) can be found using the formula:

\[ \text{Distance} = \text{Speed} \times \text{Time} \]

For Friend P, this would be:

\[ 21.5 \text{ km} = 1.15V \times t \]

But we don't need to find t; we want to know how far Friend P has walked. Since they both start at the same time, the distance Friend P walks is also:

\[ \text{Distance of P} = 1.15V \times t \]

So, Friend P will have walked 21.5 km when they meet.

The correct answer is B) 21.5.
Inference outputs: B) 21.5.
Yes.
Inference outputs: Th