In [1]:
from multiprocessing import Process, Manager
import json
import os
from pprint import pprint
import re
from tqdm import tqdm
import random

import uuid
import openai
import tiktoken
import json
import numpy as np
import requests
from retry import retry
from scipy import sparse
#from rank_bm25 import BM25Okapi
#import jieba
from http import HTTPStatus
import dashscope
from dotenv import dotenv_values
config = dotenv_values('.env')

In [3]:
MODEL_NAME = 'qwen2-7b-instruct'
dashscope.api_key = config['qwen_key'],

@retry(delay=60, tries=3)
def call_qwen_api(MODEL_NAME, query):
    messages = [
        {'role': 'user', 'content': query}]
    response = dashscope.Generation.call(
        MODEL_NAME,
        messages=messages,
        result_format='message',  # set the result is message format.
    )
    if response.status_code == HTTPStatus.OK:
        #print(response)
        return response['output']['choices'][0]['message']['content']
    else:
        print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
            response.request_id, response.status_code,
            response.code, response.message
        ))
        raise Exception()

In [4]:
def get_prompt(problem, question, options):

    options = '\n'.join(f"{'ABCDEFG'[i]}. {o}" for i, o in enumerate(options))

    prompt = f"""你是一个逻辑推理专家，擅长解决逻辑推理问题。以下是一个逻辑推理的题目，形式为单项选择题。所有的问题都是（close-world assumption）闭世界假设，即未观测事实都为假。请逐步分析问题并在最后一行输出答案，最后一行的格式为"答案是：A"。题目如下：

### 题目:
{problem}

### 问题:
{question}
{options}
"""

    return prompt

In [5]:
def extract(input_text):
    ans_pattern = re.compile(r"答案是：(.)", re.S)

    problems = ans_pattern.findall(input_text)
    return problems[0]

In [6]:
def produce(data, MODEL_NAME, return_list, pid):
    tqdm1 = tqdm
    for task in tqdm1(data):
            problem = task['problem']
            for question in task['questions']:

                prompt = get_prompt(problem, 
                                    question['question'], 
                                    question['options'],
                                    )

                response = call_qwen_api(MODEL_NAME, prompt)
                try:
                    extract_response = extract(response)
                    question[MODEL_NAME] = extract_response
                    if pid == 0:
                        pprint(extract_response)
                    break
                except:
                    pass
            
            return_list.append(task)

In [7]:
def main(ifn, ofn):
    # 如果输出文件已经存在，则不进行处理
    if os.path.exists(ofn):
        pass

    # 设置进程池大小
    POOL_SIZE = 5
    data = []

    # 读取输入文件
    with open(ifn) as reader:
        for line in reader:
            # 解析每一行JSON数据
            sample = json.loads(line)
            data.append(sample)

    # 打乱数据顺序
    random.shuffle(data)

    # 将数据分割成POOL_SIZE个子列表
    datas = [data[i::POOL_SIZE] for i in range(POOL_SIZE)]

    # 使用Manager创建一个共享对象
    with Manager() as manager:
        # 创建一个共享列表，用于存储所有进程的结果
        return_list = manager.list()

        # 创建进程列表
        producers = []
        for i in range(POOL_SIZE):
            # 创建每个进程，传递数据子集、模型名称、结果列表和进程编号
            p = Process(target=produce,
                        args=(datas[i],  # 数据子集
                              MODEL_NAME,  # 模型名称
                              return_list,  # 共享结果列表
                              i,  # 进程编号
                              )
                        )
            producers.append(p)

        # 启动所有进程
        for p in producers:
            p.start()

        # 等待所有进程完成
        for p in producers:
            p.join()

        # 输出结果列表的长度
        print(len(return_list))

        # 写入输出文件
        with open(ofn, 'w') as writer:
            for sample in return_list:
                # 将结果写入文件
                writer.write(json.dumps(sample, ensure_ascii=False))
                writer.write('\n')

    # 输出任务完成的消息
    print("All tasks finished!")

    # 调用评估函数
    evaluate(ofn)

In [8]:
def evaluate(ofn):
    data = []
    with open(ofn) as reader:
        for line in reader:
            sample = json.loads(line)
            data.append(sample)

    pse = 0
    cnt = 0
    tot = 0
    for task in data:
        for question in task['questions']:
            
            if MODEL_NAME in question:
                tot += 1
                cnt += question[MODEL_NAME] == question['answer']
            else:
                pse += 1

    print(cnt, tot, cnt/tot, pse)

In [9]:
if __name__ == '__main__':

    a = extract("""根据欧几里得算法，逐步解析计算两个数6和7的最大公约数（gcd）的步骤如下：

1. 判断6和7是否相等：不相等。
2. 判断6和7大小关系，7 > 6，所以用更大的数7减去较小的数6得到结果1。
3. 现在计算6和1的最大公约数。
4. 6 > 1，根据算法用更大的数6减去较小的数1得到结果5。
5. 再计算5和1的最大公约数。
6. 5 > 1，用5减去1得到结果4。
7. 再计算4和1的最大公约数。
8. 4 > 1，用4减去1得到结果3。
9. 再计算3和1的最大公约数。
10. 3 > 1，用3减去1得到结果2。
11. 再计算2和1的最大公约数。
12. 2 > 1，用2减去1得到结果1。
13. 最后计算1和1的最大公约数，两数相等，gcd即为这两个数，也就是1。

因此，6和7的最大公约数是1。

答案是：C.""")

    print(a)
    main('data/round1_train_data.jsonl', 'result/qwen.jsonl')

C


  0%|          | 0/100 [00:00<?, ?it/s]

'A'


  1%|          | 1/100 [00:02<04:21,  2.65s/it]

'B'


  2%|▏         | 2/100 [00:06<05:21,  3.28s/it]

'B'


  3%|▎         | 3/100 [00:12<06:22,  3.94s/it]

'没'


 10%|█         | 10/100 [00:23<03:13,  2.14s/it]

'这'


 10%|█         | 10/100 [00:26<02:52,  1.92s/it]

'C'


 13%|█▎        | 13/100 [00:29<02:55,  2.02s/it]

'B'


  9%|▉         | 9/100 [00:33<05:23,  3.56s/it]]

Request id: bf06bd7e-4543-9fc2-bffb-5edc0c733343, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 15%|█▌        | 15/100 [00:33<03:01,  2.14s/it]

Request id: b40214e0-dd7c-9eee-b5fa-be73cb4f3da8, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 14%|█▍        | 14/100 [00:35<03:20,  2.33s/it]

'无'


  8%|▊         | 8/100 [00:35<07:29,  4.89s/it]

Request id: ad2bbcf9-c8e3-9f8d-bfff-ecc65012bc52, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: 69c1a308-95ae-9682-98ae-5aa1292fb43e, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: be920a42-3bf3-9ba9-bcb1-6b307133d8ab, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: e9a3d280-4b82-9d4c-a214-802567be6fb2, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: 58a9aa22-9dde-905a-80d2-75b40b6e61c3, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: eaf9c9c7-fc86-94e7-bad9-2f51a582748d, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded

  9%|▉         | 9/100 [00:39<06:41,  4.42s/it]
Process Process-3:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_5276/3504293755.py", line 12, in produce
    response = call_qwen_api(MODEL_NAME, prompt)
  File "<decorator-gen-20>", line 2, in call_qwen_api
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
  File "/tmp/ipykernel_5276/3893397532.py", line 21, in call_qwen_api
    raise Exception()
Exception


Request id: 49644b24-362a-9914-a2ea-208dd319f87c, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 15%|█▌        | 15/100 [00:40<03:48,  2.69s/it]
Process Process-6:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_5276/3504293755.py", line 12, in produce
    response = call_qwen_api(MODEL_NAME, prompt)
  File "<decorator-gen-20>", line 2, in call_qwen_api
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
  File "/tmp/ipykernel_5276/3893397532.py", line 21, in call_qwen_api
    raise Exception()
Exception


Request id: 5fa555cc-6b1a-9d00-aeb3-ff1d919dc522, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


  8%|▊         | 8/100 [00:41<08:01,  5.23s/it]
Process Process-2:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_5276/3504293755.py", line 12, in produce
    response = call_qwen_api(MODEL_NAME, prompt)
  File "<decorator-gen-20>", line 2, in call_qwen_api
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
  File "/tmp/ipykernel_5276/3893397532.py", line 21, in call_qwen_api
    raise Exception()
Exception


Request id: 8c8f9be7-4e51-9d6d-be84-2c89c0bae0a6, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 14%|█▍        | 14/100 [00:42<04:18,  3.01s/it]
Process Process-4:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_5276/3504293755.py", line 12, in produce
    response = call_qwen_api(MODEL_NAME, prompt)
  File "<decorator-gen-20>", line 2, in call_qwen_api
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
  File "/tmp/ipykernel_5276/3893397532.py", line 21, in call_qwen_api
    raise Exception()
Exception
 13%|█▎        | 13/100 [00:48<09:50,  6.78s/it]

Request id: 4a232a8c-f2cf-9256-8e45-08fa9adb493e, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: 68e80911-7d0b-9cba-b0d6-105c62cb296b, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.
Request id: 44658085-add7-9af0-9dc8-267d4524e92a, Status code: 429, error code: Throttling.RateQuota, error message: Requests rate limit exceeded, please try again later.


 13%|█▎        | 13/100 [00:55<06:09,  4.25s/it]
Process Process-5:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_5276/3504293755.py", line 12, in produce
    response = call_qwen_api(MODEL_NAME, prompt)
  File "<decorator-gen-20>", line 2, in call_qwen_api
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 73, in retry_decorator
    return __retry_internal(partial(f, *args, **kwargs), exceptions, tries, delay, max_delay, backoff, jitter,
  File "/usr/local/lib/python3.10/site-packages/retry/api.py", line 33, in __retry_internal
    return f()
  File "/tmp/ipykernel_5276/3893397532.py", line 21, in call_qwen_api
    raise Exception()
Exception


59
All tasks finished!
45 59 0.7627118644067796 95
