In [1]:
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline
import torch
from transformers import TextGenerationPipeline
from tqdm.notebook import tqdm
import os
from dataclasses import dataclass, field
from langchain.chains.base import Chain
from datasets import Dataset
from trl.trainer import ConstantLengthDataset
from peft import LoraConfig
from transformers import DataCollatorForLanguageModeling
from trl.trainer import SFTTrainer, SFTConfig
from transformers import Qwen2ForCausalLM, Qwen2TokenizerFast

In [2]:
from dotenv import load_dotenv
load_dotenv("../.env")

True

In [3]:
def format_options(options):
    return '\n'.join(
        [
            f'{chr(ord("A") + i)}: {option}'
            for i, option in enumerate(options)
        ]
    )

In [4]:
@dataclass
class QuestionItem:
    question: str
    options: list[str]
    reasoning: str | None = field(default=None)
    answer: str | None = field(default=None)

@dataclass
class Entry:
    problem: str = field(default="")
    questions: list[QuestionItem] = field(default_factory=list)

In [5]:
def parse_file(file_path: str) -> list[Entry]:
    with open(file_path, "r") as f:
        lines = f.readlines()
    entries = []
    import json
    for line in lines:
        entry = json.loads(line)
        questions = []
        for question in entry["questions"]:
            questions.append(QuestionItem(**question))
        entries.append(Entry(problem=entry["problem"], questions=questions))
    return entries

In [6]:
from concurrent.futures import ThreadPoolExecutor

In [7]:
def generate_reasoning_inplace(entries: list[Entry]) -> None:
    template = """你是一个逻辑推理专家，擅长解决逻辑推理问题。以下是一个逻辑推理的题目，形式为单项选择题。所有的问题都是（close-world assumption）闭世界假设，即未观测事实都为假。每个问题都保证能通过一系列基于形式逻辑的推理（包括同一律，矛盾律，排中律的使用等）得到确定的唯一答案。我会向你提供答案，而你要给出逐步的解析来教会我如何得到答案。我是一个小学生，所以每一步的推理不要太复杂。
{problem}

### 问题
{question}
{options}

### 答案
{answer}

### 分析过程"""
    import time
    import dashscope
    client = dashscope.Conversation()
    progress_q = tqdm(total=sum([len(entry.questions) for entry in entries]))
    import re
    answer_regex = re.compile(r"答案.*?([A-Z])")
    def process_question(entry, question):
        if question.reasoning is not None:
            matches = answer_regex.findall(question.reasoning)
            answer = matches[-1] if len(matches) > 0 else None
            if answer == None:
                with open("./a.txt", "a") as f:
                    f.write("1\n")
                    f.write(question.reasoning)
                progress_q.update(1)
                return
            if answer != question.answer:
                # print(f"答案不匹配: {answer} != {question.answer}")
                pass
            else:
                progress_q.update(1)
                return
        max_retries = 1
        for attempt in range(max_retries):
            try:
                reasoning_prompt = template.format(**{
                    "problem": entry.problem,
                    "question": question.question,
                    "options": format_options(question.options),
                    "answer": question.answer
                })
                resp = client.call(model="qwen1.5-0.5b-chat", prompt=reasoning_prompt)
                question.reasoning = resp.output.text
                with open("reasoning.txt", "a") as f:
                    f.write(f"{reasoning_prompt}\n\n{question.reasoning}\n\n")
                with open("./cnt.txt", "a") as f:
                    f.write("1\n")
                # 答案是 [A-Z]
                matches = answer_regex.findall(question.reasoning)
                answer = matches[-1] if len(matches) > 0 else None 
                if answer is None:
                    break
                if answer != question.answer:
                    print(f"答案不匹配: {answer} != {question.answer}")
                    # Retry
                    continue
                break  # Exit the loop if successful
            except Exception as e:
                if attempt < max_retries - 1:
                    time.sleep(1)  # Optional: wait a bit before retrying
                else:
                    print(f"Failed to process question.")
                    print(str(e))
        progress_q.update(1)

    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        for entry in tqdm(entries):
            for question in entry.questions:
                futures.append(executor.submit(process_question, entry, question))
        
        for future in futures:
            future.result()
    progress_q.close()

In [8]:
once = False

In [9]:
def create_train_dataset(entries: list[Entry]) -> Dataset:
    dataset = []
    for entry in entries:
        for question in entry.questions:
            if question.reasoning is None or question.answer is None:
                continue
            global once
            if not once:
                print({
                    "problem": entry.problem,
                    "question": question.question,
                    "options": format_options(question.options),
                    "reasoning": question.reasoning,
                    "answer": question.answer
                })
                once = True
            dataset.append({
                "problem": entry.problem,
                "question": question.question,
                "options": format_options(question.options),
                "reasoning": question.reasoning,
                "answer": question.answer
            })
    return Dataset.from_list(dataset)

In [10]:
# entries = parse_file("./round1_train_data.jsonl")
import pickle
entries = pickle.load(open("entries_false.pkl", "rb"))

In [11]:
entries[1]

Entry(problem='有一个方程的系数分别为A, B和C，下面是一些已知的规则：\n\n1. 如果B^2 - 4AC = 0，则方程有一个实数解。\n2. 如果B^2 - 4AC > 0，则方程有两个不同的实数解。\n3. 如果B^2 - 4AC < 0，则方程有两个不同的复数解。\n\n根据以上规则，回答以下选择题：', questions=[QuestionItem(question='选择题 2：\n当系数A=7, B=9, C=5时，方程是否有解为6？', options=['是', '否'], reasoning='首先，我们知道方程是 \\( B^2 - 4AC = 0 \\) 的情况。这意味着如果 B^2 - 4AC = 0，那么AC = 0。这意味着 \\( B^2 = 4AC \\)，这意味着 \\( A = 4 \\)。\n\n然后我们又知道系数A=7, B=9, C=5。因此，我们可以利用上述规则来找出相应的方程组。\n\n- 第二个方程：\n\\[ B^2 - 4AC > 0 \\]\n\n由于已经我们知道 B^2 = 4AC，因此我们将 \\( A = 4 \\) 替换为 \\( A = 4 \\) 以解这个方程组。\n\\[ B^2 - 4 \\cdot 4 \\cdot 9 > 0 \\]\n\\[ B^2 - 36 > 0 \\]\n\\[ B^2 > 36 \\]\n\n- 第三个方程：\n\\[ B^2 - 4 \\cdot 4 \\cdot 5 > 0 \\]\n\\[ B^2 - 16 \\cdot 5 > 0 \\]\n\\[ B^2 > 80 \\]\n\n综上所述，所有的方程都满足第二个条件 \\( B^2 - 4 \\cdot 4 \\cdot 9 > 0 \\)，并对应于第三个方程 \\( B^2 - 4 \\cdot 4 \\cdot 5 > 0 \\)。这意味着在满足所有条件的情况下，B^2的值都大于或等于9，因此方程组也有两个不同的实数解。\n\n因此，选项B：“否”不正确。正确的答案是A：是。', answer='A')])

In [12]:
# add one for every answer to get the false reasoning
# for entry in entries:
#     for question in entry.questions:
#         question.answer = chr((ord(question.answer) - ord("A") + 1) % len(question.options) + ord("A"))

In [13]:
# with open("./cnt.txt", "w"):
#     pass
# generate_reasoning_inplace(entries)

In [14]:
import pickle
pickle.dump(entries, open("entries_false.pkl", "wb"))

In [15]:
import re
answer_regex = re.compile(r"答案.*?([A-Z])")
export_entries = []
for entry in entries:
    export_questions = []
    for question in entry.questions:
        if question.reasoning is None:
            continue
        matches = answer_regex.findall(question.reasoning)
        answer = matches[-1] if len(matches) > 0 else None
        if answer is None:
            continue
        if answer != question.answer:
            print(f"答案不匹配: {answer} != {question.answer}")
            continue
        export_questions.append(QuestionItem(
            question=question.question, options=question.options, reasoning=question.reasoning, answer=answer
        ))
    if len(export_questions) == 0:
        continue
    export_entries.append(Entry(problem=entry.problem, questions=export_questions))

In [16]:
ds = create_train_dataset(export_entries)

{'problem': '有一个英文到法文的词汇表，包含以下对应词汇：\n\n1. the -> le\n2. cat -> chat\n3. jumps -> sauts\n4. over -> sur\n5. moon -> lune\n6. cow -> vache\n7. plays -> jouer\n8. fiddle -> violon\n9. egg -> bougre\n10. falls -> des chutes\n11. off -> de\n12. wall -> mur\n\n根据这个词汇表，翻译以下英文句子成法文：', 'question': '选择题 1：\n英文句子 "the cat jumps over the moon" 翻译成法文是：', 'options': 'A: le chat saute sur la lune\nB: le chat sauts sur le lune\nC: le sauts chat sur le lune\nD: le chat sauts sur le lune', 'reasoning': '首先，我们需要理解英语句子的意思。从“cat jumps over the moon”我们可以推断出这是一个描述一只猫在月光下跳跃的动作。\n\n接下来，我们需要了解中文里的同义词和否定词。同义词通常指被替换的部分或者表达相同意思的词语。这里，“chat”、“saute”和“play”均具有同样的含义。\n\n然后我们找出每一个成语的中文翻译：“le chat saute”表示在明亮的阳光或温暖的空气中玩耍，“over”代表越过，“sauut”表示烤着，“play”表示玩耍。\n\n综上所述，正确答案是：A：le chat saute sur la lune。\n\n解析过程如下：\n1. “cat” 可以理解为一只小动物；\n2. “jumps” 可以理解为跳过；\n3. “over” 表示越过、跨越；\n4. “suut”表示烤着；\n5. “play”表示玩耍。将以上五个词放在一起形成完整的句子是：le chat saute sur la lune。', 'answer': 'A'}


In [17]:
ds.save_to_disk("./false_dataset")

Saving the dataset (0/1 shards):   0%|          | 0/850 [00:00<?, ? examples/s]

In [18]:
ds[1]

{'problem': '有一个英文到法文的词汇表，包含以下对应词汇：\n\n1. the -> le\n2. cat -> chat\n3. jumps -> sauts\n4. over -> sur\n5. moon -> lune\n6. cow -> vache\n7. plays -> jouer\n8. fiddle -> violon\n9. egg -> bougre\n10. falls -> des chutes\n11. off -> de\n12. wall -> mur\n\n根据这个词汇表，翻译以下英文句子成法文：',
 'question': '选择题 2：\n英文句子 "the cow plays the fiddle" 翻译成法文是：',
 'options': 'A: le vache jouer le violon\nB: le jouer vache le violon\nC: le vache jouer la vièle\nD: la vache joue le violon',
 'reasoning': '首先，我们需要了解每个词在英文中的含义，并且这些词之间的关系。\n\n- "the cow" (the milk牛) 指的是物象\n- "plays" (打牌) 指的是行为\n- "le fiddle" (小提琴) 是指乐器\n\n现在我们可以理解英语句子 "the cow plays the fiddle" 的意思为：这个奶牛正在弹小提琴。在这里，我们看到一个简单的句子结构，即词汇组构成了一个完整的句子，而且在表达完整的意思时不需要多余的词语或句子结构。\n\n接下来，我们需要将所有的单词与它们对应的中文意义进行连接，从而形成法文表达。这个汉语句子"the milk牛 played the fiddle" 非常简单，但是我们可以看到在中文中，“the” 意味着“这”，“the” 和 “playing” 这两个词共同组成了一个完整的句子，其中“ playing” 是动词短语。\n\n因此，正确答案是 B: le jouer vache le violon。',
 'answer': 'B'}

In [19]:
import pickle
pickle.dump(export_entries, open("entries_false.pkl", "wb"))

In [20]:
len(export_entries)

417