In [16]:
from multiprocessing import Process, Manager
import json
import os
from pprint import pprint
import re
from tqdm import tqdm
import random

import uuid
import openai
import tiktoken
import json
import numpy as np
import requests
from retry import retry
from scipy import sparse
#from rank_bm25 import BM25Okapi
#import jieba
from http import HTTPStatus
import dashscope
from dotenv import dotenv_values
config = dotenv_values('.env')

In [17]:
MODEL_NAME = 'qwen-max'
dashscope.api_key = config['qwen_key'],

@retry(delay=60, tries=3)
def call_qwen_api(MODEL_NAME, query):
    messages = [
        {'role': 'user', 'content': query}]
    response = dashscope.Generation.call(
        MODEL_NAME,
        messages=messages,
        result_format='message',  # set the result is message format.
    )
    if response.status_code == HTTPStatus.OK:
        #print(response)
        return response['output']['choices'][0]['message']['content']
    else:
        print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
            response.request_id, response.status_code,
            response.code, response.message
        ))
        raise Exception()

In [18]:
def get_prompt(problem, question, options):

    options = '\n'.join(f"{'ABCDEFG'[i]}. {o}" for i, o in enumerate(options))

    prompt = f"""你是一个逻辑推理专家，擅长解决逻辑推理问题。以下是一个逻辑推理的题目，形式为单项选择题。所有的问题都是（close-world assumption）闭世界假设，即未观测事实都为假。请逐步分析问题并在最后一行输出答案，最后一行的格式为"答案是：A"。题目如下：

### 题目:
{problem}
### 问题:
{question}
### 选项:
{options}
"""

    return prompt

In [19]:
def extract(input_text):
    ans_pattern = re.compile(r"答案是：(.*)", re.S)

    # 使用 findall 方法找到所有匹配的字符串
    problems = ans_pattern.findall(input_text)
    if len(problems) == 0:
        return '提取失败'

    # 提取答案中出现的所有可能的答案字母
    possible_answers = "ABCDEFG"  # 假设可能的答案字母是 A, B, C, D
    
    answers = [match for match in problems[0] if match in possible_answers]

#     print(answers)  # 输出：['A']
#     ans_pattern = re.compile(r"答案是：(.)", re.S)
#     problems = ans_pattern.findall(input_text)
    if len(answers) == 0:
        return '提取失败'
        
    return answers[0]

In [20]:
def produce(data, MODEL_NAME, return_list, pid):
    tqdm1 = tqdm
    for task in tqdm1(data):
            problem = task['problem']
            for question in task['questions']:

                prompt = get_prompt(problem, 
                                    question['question'],
                                    question['options'],
                                    )

                response = call_qwen_api(MODEL_NAME, prompt)
                question['qwen_respond'] = response
                extract_response = extract(response)
                question[MODEL_NAME] = extract_response
                
                # try:
                #     attemp = 1
                #     while attemp <= 3:
                #         #print(f'try：{attemp}')
                #         extract_response = extract(response)
                #         question[MODEL_NAME] = extract_response
                #         if question[MODEL_NAME] == '':
                #             print(f'wtrong response：{response}')
                #             attemp += 1
                #             continue
                #         else:
                #             attemp = 4
                #     # if pid == 0:
                #     #     pprint(extract_response)
                #     # break
                # except:
                #     pass
            return_list.append(task)

In [21]:
def main(data, ofn):
    # 如果输出文件已经存在，则不进行处理
    # if os.path.exists(ofn):
    #     pass

    # 设置进程池大小
    POOL_SIZE = 5
    #data = []

    # 将数据分割成POOL_SIZE个子列表
    datas = [data[i::POOL_SIZE] for i in range(POOL_SIZE)]

    # 使用Manager创建一个共享对象
    with Manager() as manager:
        # 创建一个共享列表，用于存储所有进程的结果
        return_list = manager.list()

        # 创建进程列表
        producers = []
        for i in range(POOL_SIZE):
            # 创建每个进程，传递数据子集、模型名称、结果列表和进程编号
            p = Process(target=produce,
                        args=(datas[i],  # 数据子集
                              MODEL_NAME,  # 模型名称
                              return_list,  # 共享结果列表
                              i,  # 进程编号
                              )
                        )
            producers.append(p)

        # 启动所有进程
        for p in producers:
            p.start()

        # 等待所有进程完成
        for p in producers:
            p.join()

        # 输出结果列表的长度
        print(len(return_list))

        # 写入输出文件
        with open(ofn, 'w') as writer:
            for sample in return_list:
                # 将结果写入文件
                writer.write(json.dumps(sample, ensure_ascii=False))
                writer.write('\n')

    # 输出任务完成的消息
    print("All tasks finished!")

    # 调用评估函数
    evaluate(ofn)

In [22]:
def evaluate(ofn):
    data = []
    with open(ofn) as reader:
        for line in reader:
            sample = json.loads(line)
            data.append(sample)

    pse = 0
    cnt = 0
    tot = 0
    for task in data:
        for question in task['questions']:
            
            if MODEL_NAME in question:
                tot += 1
                cnt += question[MODEL_NAME] == question['answer']
            else:
                pse += 1

    print(cnt, tot, cnt/tot, pse)

In [23]:
def read_file(ifn):
    # 读取输入文件
    data=[]
    with open(ifn) as reader:
        for line in reader:
            # 解析每一行JSON数据
            sample = json.loads(line)
            data.append(sample)

    # 打乱数据顺序
    #random.shuffle(data)
    return data

In [25]:
if __name__ == '__main__':

    a = extract("""根据欧几里得算法，逐步解析计算两个数6和7的最大公约数（gcd）的步骤如下：

1. 判断6和7是否相等：不相等。
2. 判断6和7大小关系，7 > 6，所以用更大的数7减去较小的数6得到结果1。
3. 现在计算6和1的最大公约数。
4. 6 > 1，根据算法用更大的数6减去较小的数1得到结果5。
5. 再计算5和1的最大公约数。
6. 5 > 1，用5减去1得到结果4。
7. 再计算4和1的最大公约数。
8. 4 > 1，用4减去1得到结果3。
9. 再计算3和1的最大公约数。
10. 3 > 1，用3减去1得到结果2。
11. 再计算2和1的最大公约数。
12. 2 > 1，用2减去1得到结果1。
13. 最后计算1和1的最大公约数，两数相等，gcd即为这两个数，也就是1。

因此，6和7的最大公约数是1。

答案是：应该是阿飞.""")
    # data = read_file('data/round1_train_data.jsonl')
    # main(data[:2],'result/qwen.jsonl')

    vaild_file = read_file('data/valid_data_500.jsonl')
    main(vaild_file, 'result/qwen_vaild.jsonl')

  1%|          | 1/100 [01:56<3:12:26, 116.63s/it]

Request id: 11dc4bce-41fb-95d5-8373-64ea6ea579ae, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


  4%|▍         | 4/100 [03:39<1:11:16, 44.55s/it] 

Request id: ed6c79a1-510e-9984-8441-aa71855504be, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


  6%|▌         | 6/100 [04:35<1:01:32, 39.28s/it]

Request id: 63529b29-4299-9fe7-ac5c-ff675fc1c27e, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


  6%|▌         | 6/100 [05:23<1:19:21, 50.65s/it]

Request id: adec2cc4-36aa-97ae-89a7-05979a172393, Status code: 400, error code: DataInspectionFailed, error message: Output data may contain inappropriate content.


  8%|▊         | 8/100 [07:21<1:46:50, 69.68s/it]

Request id: c45d1c1e-705e-98b4-bf45-4af28a5bb44d, Status code: 400, error code: DataInspectionFailed, error message: Output data may contain inappropriate content.


 10%|█         | 10/100 [08:28<1:13:08, 48.76s/it]

Request id: 82fc2e0b-2c78-9591-9cfa-42b4dd6b3962, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 12%|█▏        | 12/100 [09:17<1:10:43, 48.22s/it]

Request id: 4475ee85-4af8-91a6-acf1-b3e8ebf12852, Status code: 400, error code: DataInspectionFailed, error message: Output data may contain inappropriate content.


  7%|▋         | 7/100 [09:19<2:03:49, 79.88s/it]
Process Process-12:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_548/2464839891.py", line 12, in produce
    response = call_qwen_api(MODEL_NAME, prompt)
  File "<decorator-gen-22>", line 2, in call_qwen_api
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
  File "/tmp/ipykernel_548/1098796623.py", line 21, in call_qwen_api
    raise Exception()
Exception
 21%|██        | 21/100 [20:07<1:31:35, 69.56s/it] 

Request id: d34db93f-e0d0-9f3b-b45c-b2b0f9b74e38, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 28%|██▊       | 28/100 [28:24<1:01:20, 51.12s/it]

Request id: a01e1467-7b2b-90ef-bdb6-a94b99db48d5, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: a61073d9-b0b0-9ebf-b0fe-b4f868e2c3c7, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 43%|████▎     | 43/100 [32:57<35:16, 37.13s/it]   

Request id: 2a52c090-5723-92d8-8092-00dd5e74bfeb, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 35%|███▌      | 35/100 [34:25<46:51, 43.25s/it]  

Request id: 7deab613-d333-97d3-aa19-b67a801677a7, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: ede86708-5071-92ce-b21a-ab1dde8e21a2, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 40%|████      | 40/100 [35:33<55:26, 55.45s/it]

Request id: eb0aa7f3-ec0c-93f6-93b7-26db28f28f40, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 44%|████▍     | 44/100 [38:38<42:21, 45.39s/it]t]

Request id: a72eae47-4f20-960c-adb0-8d89d3993aec, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 50%|█████     | 50/100 [47:27<32:23, 38.87s/it]  

Request id: 006e5351-6a94-992b-9895-a717885e509e, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 60%|██████    | 60/100 [51:58<35:30, 53.26s/it]  

Request id: 9733b136-ce06-9fab-9a1b-9311a791243d, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 72%|███████▏  | 72/100 [1:07:11<26:28, 56.73s/it]

Request id: a47c83ea-016b-9bf8-af5e-f15ba7b21355, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 81%|████████  | 81/100 [1:10:07<24:00, 75.82s/it]

Request id: 97e5b7b7-7aa2-90f7-a7fa-589902e3fa04, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 77%|███████▋  | 77/100 [1:10:08<23:52, 62.30s/it]

Request id: 8dc9e050-6294-9fe5-842e-593c4a28b6d3, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: db44897a-a163-97d4-b4fe-81f1864c3bd1, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 92%|█████████▏| 92/100 [1:12:54<04:07, 30.94s/it]

Request id: f2b81cd8-d6d5-9fce-8f82-ab3cd96c4d1f, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


100%|██████████| 100/100 [1:21:12<00:00, 48.72s/it]
100%|██████████| 100/100 [1:24:56<00:00, 50.96s/it]
100%|██████████| 100/100 [1:35:34<00:00, 57.34s/it]
100%|██████████| 100/100 [1:39:56<00:00, 59.97s/it]


407
All tasks finished!
1697 2247 0.7552291944815309 0


In [10]:
def validate_answers(file):
    validated_problem_sets = []
    
    for data in file:
        if 'promble' in data:
            data['problem'] = data.pop('promble')
        problem = data['problem']
        questions = data['questions']
        _id = data['id']
        
        validated_questions = []
        for question in questions:
            answer = question['answer']
            # 检查答案是否为大写字母
            if len(answer) == 1 and answer.isupper():
                validated_questions.append(question)
            else:
                print(f"Invalid answer '{answer}' for question: {question['question']}")
        
        # 创建一个新的问题集字典，只包含有效的问题
        validated_problem_set = {
            'problem': problem,
            'questions': validated_questions,
            'id': _id
        }
        validated_problem_sets.append(validated_problem_set)
    
    return validated_problem_sets

In [11]:
import json
def write_jsonl(results, filename):
    with open(f'data/{filename}.jsonl', 'w',encoding='utf-8') as outfile:
        for entry in results:
            json.dump(entry, outfile, ensure_ascii=False)
            outfile.write('\n')

In [12]:
file = read_file('data/output.jsonl')
vaild_file = validate_answers(file)
write_jsonl(vaild_file,"valid_data_500")

Invalid answer 'Henry' for question: 选择题 5：
如果从Henry和Tom中选择一人去参加仅限单身人士的活动，应选择谁？
Invalid answer '无法确定' for question: 选择题 6：
是否存在第三位未提及的男性角色是单身汉的可能性？
Invalid answer '无正确选项' for question: 选择题 4：
在两个列表中，哪个数字是唯一同时出现在两个列表开头和结尾位置的数字？
Invalid answer '无直接答案，但根据信息推断John可能喜欢Julie（金发且富有），但选项中没有直接匹配项，因为问题设计需调整以符合逻辑推理框架。' for question: 选择题 4：**
哪位女性符合John的喜好标准？
Invalid answer '否' for question: 选择题 5：
列表处理规则是否保留偶数位置的元素？
Invalid answer '不能' for question: 选择题 4:
物品B能否直接拿到而不需要移动A？
Invalid answer '威尔士' for question: 选择题 5：
如果还有第三个人Emma被标记为Welch，Emma来自哪个国家？
Invalid answer '否' for question: 选择题 6：
假设出现第四个人Frank，未给出他的标记，Frank有可能既不是威尔士也不是苏格兰人吗？
Invalid answer '是' for question: 选择题 6：
是否存在一个人没有恋人？
Invalid answer '否' for question: 选择题 7：
Mary 是否喜欢 John？
Invalid answer '1-4' for question: 选择题 4：
哪两点之间的路径权重总和最大？
Invalid answer '1' for question: 选择题 5：
从点3到点5的最短路径权重是多少？
Invalid answer '是' for question: 选择题 3:
是否可以从 Nancy 通过直达或换乘到达 Saarbruecken？
Invalid answer '是' for question: 选择题 4:
是否存在一条路线，可以从 Freyming 直达或换乘到达