In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [None]:
ref_form = r'''
    Equations:
    Solution: $\left\{\begin{array}{l}5x + 4y = 6, (1) \\ 2x + 3y = 1; (2)\end{array}\right.$
    From $(1) \times 2$, get: $10x + 8y = 12. (3)$
    From $(2) \times 5$, get: $10x + 15y = 5. (4)$
    $(3)-(4)$, get: $-7y = 7,$
    Solve to get: $y = -1$.
    Substitute $y = -1$ into $(1)$, get: $5x - 4 = 6$,
    Solve to get: $x = 2$.
'''

prompt_template = r"""
    Role:
    You are Xiaopai, a professional and inspiring math teacher who can solve elementary, middle, and high school math problems. You will explain a problem one-on-one.
    Goals:
    Explain a problem like a teacher based on the given information.

    Skills:
    Can use code to assist in solving problems.
    Capable of handling various calculations.

    Workflows:
    You will explain the problem using the following framework:
        1. Identify the knowledge points involved in the problem.
        2. Determine if the given information includes the original problem and its solution process. If it does, use the original solution steps for explanations; if not, provide solutions based on similar problems.

    First Part: Original Problem and Reference Information
        Problem: {question}
        Grade Level: Middle school, your explanation should match middle school knowledge points and expression without exceeding the scope.
    Second Part: Output in the following string format
        Knowledge Point Explanation: This problem involves some xxx related knowledge points, typically solved using xxx method. ###Board Writing One: ###Explanation One: ###Board Writing Two: ###Explanation Two: ...
    Notes:
        1. Utilize reference information for solving problems; if reference information is available, it should be a similar problem.
        2. Your solving approach and explanations should closely follow the reference information, with calculations assisted by code to reduce direct problem-solving.
        3. The Knowledge Point Explanation section should state: The knowledge point of this problem is....., and the key to solving it is......
        4. Board writing is the problem-solving steps; it must be consistent with the thought process and knowledge point explanation, written in a standard answer format, divided into units of condition + conclusion.
        5. Each formula should be on its own line without displaying long continuous formulas.
        6. Board writing should primarily contain formulas with minimal Chinese text; detailed problem conditions should be abbreviated using phrases like "according to the problem."
        7. Avoid markdown syntax like ** for titles and - for lists, and avoid using \text{{}} in LaTeX formulas unless necessary.
        8. If the problem contains multiple sub-problems, show sub-problem numbers at the start of the relevant board writing section.
        9. Explanations should correspond to the board writing, emphasizing the thought process and methods involved, and be concise and precise.
        10. The response fields should only include Knowledge Point Explanation, Board Writing N, and Explanation N, with all board writing modules completed before any explanation modules; they must match in content and number.
        11. Refer to the writing style template for board writing based on the type of problem.
            {ref_form}
        12. Other Notes:
            a: Middle school inequalities have not learned to use brackets or set notation; avoid using these expressions.
            b: For middle school geometry, if proving congruent triangles or parallel lines, state the theorem used clearly after the formula.
            c: For multiple-choice questions, label options with ABCD instead of numbered brackets.

"""

chinese2eng_prompt = r"""
    Role:
        You are a professional translator specialized in mathmatical fields. Your jobs is to translate Math problem from Chinese to English
    
    Notes:
        just output the English version of the input problem, nothing else

    Problem to be translated to english:
        {problem}
"""

eng2chinese_template = r"""
    Role:
        你是一个专业的数学领域的翻译人员，你的职责是将英语题目解析翻译成中文

    Notes:
        请将英文版本的题目解析翻译成中文，保留引文题解的格式，不要输出其他任何内容

    需要翻译的内容:
        {problem}
"""

In [None]:
llm = ChatOpenAI(model='gpt-4o', max_tokens=1000)
parser = StrOutputParser()
prompt_solve = ChatPromptTemplate.from_template(prompt_template)
prompt_translate = ChatPromptTemplate.from_template(chinese2eng_prompt)
prompt_reverse = ChatPromptTemplate.from_template(eng2chinese_template)
chain_solve = prompt_solve | llm | parser
chain_translate = prompt_translate | llm | parser
chain_reverse = prompt_reverse | llm | parser

In [None]:
import json
with open("test_problem_set.json", "r") as file:
    problem_set = json.load(file)
problems = []
answers = []
for ps in problem_set:
    problems.append(ps['problem'])
    answers.append(ps['answer'])

In [None]:
english_problems = []
for problem in problems:
    output = chain_translate.invoke({"problem": problem})
    print(output)
    english_problems.append(output)

In [None]:
model_ansewr_english = []
for problem in english_problems:
    # print(problem)
    response = chain_solve.invoke({"question": problem, "ref_form": ref_form})
    print(response)
    model_ansewr_english.append(response)

In [None]:
model_answer_chinese = []
for eng_p in model_ansewr_english:
    response = chain_reverse.invoke({"problem": eng_p})
    print(response)
    model_answer_chinese.append(response)

In [None]:
from langchain_core.output_parsers import StrOutputParser
async def check(note1: str, note2: str) -> bool:
    llm = ChatOpenAI(model='gpt-4o')
    sys_prompt =    r"""
                        你是一个题解检查者，你负责检查两个题解的最终答案是否一致。注意：题解不需要一模一样，
                        只要保证最后每一问的答案都完全一致即可。
                        如果答案一致：请输出是。如果答案不一致：请输出否。请确保输出一个字，不要输出“是“与”否”意外的内容

                        题解1:
                        {note1}
                        
                        题解2:
                        {note2}
                    """
    prompt = ChatPromptTemplate.from_template(sys_prompt)
    parser = StrOutputParser()
    chain = prompt | llm | parser

    response = chain.invoke(input={"note1": note1, "note2": note2})
    return response

In [None]:
scores = [0 for _ in range(len(model_answer_chinese))]
for i in range(len(model_answer_chinese)):
    bsl = answers[i]
    tst = model_answer_chinese[i]
    chk = await check(bsl, tst)
    if chk == '是':
        scores[i] = 1

In [None]:
sum(scores)

In [None]:
write2 = []
for i in range(len(problems)):
    cur = {}
    cur['problem'] = problems[i]
    cur['answer'] = model_answer_chinese[i]
    write2.append(cur)

In [None]:
with open("outputs/model_3_output.json", 'w', encoding='utf-8') as file:
    json.dump(write2, file, ensure_ascii=False, indent=4)

In [None]:
with open("outputs/model_3_output.md", 'w') as file:
    for st in write2:
        file.write("### 问题：\n" + st['problem'] + '\n')
        file.write("### 答案：\n" + st['answer'] + '\n')
        