In [1]:
import json
from http import HTTPStatus
import dashscope
from dotenv import dotenv_values
from retry import retry
config = dotenv_values('.env')

In [2]:
MODEL_NAME = 'qwen-max'
dashscope.api_key = config['qwen_key'],

@retry(delay=60, tries=3)
def call_qwen_api(MODEL_NAME, query):
    messages = [
        {'role': 'user', 'content': query}]
    response = dashscope.Generation.call(
        MODEL_NAME,
        messages=messages,
        result_format='message',  # set the result is message format.
    )
    if response.status_code == HTTPStatus.OK:
        #print(response)
        return response['output']['choices'][0]['message']['content']
    else:
        print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
            response.request_id, response.status_code,
            response.code, response.message
        ))
        raise Exception()

In [3]:
def get_prompt(problem, question, options):

    options = '\n'.join(f"{'ABCDEFG'[i]}. {o}" for i, o in enumerate(options))

    prompt = f"""你是一个逻辑推理专家，擅长解决逻辑推理问题。以下是一个逻辑推理的题目，形式为单项选择题。所有的问题都是（close-world assumption）闭世界假设，即未观测事实都为假。请逐步分析问题并在最后一行输出答案，最后一行的格式为"答案是：A"。题目如下：

### 题目:
{problem}

### 问题:
{question}
{options}
"""

    return prompt

In [4]:
def get_question_prompt(data):
    prompt = f"""你是一个逻辑推理问题出题专家，以下JSON是一个problem，对应一些子问题。除了这些子问题外再生成三个其他子问题，保持格式一致并给出参考答案。answer格式为子母序号："A"
### promble如下：
{data}
### 输出格式如下:
{{
promble:
questions:[
question:
options:[]
answer:
]
id:
}}
"""
    return prompt


In [5]:
def read_file(ifn):
    # 读取输入文件
    data = []
    with open(ifn) as reader:
        for line in reader:
            # 解析每一行JSON数据
            sample = json.loads(line)
            data.append(sample)

    return data

In [6]:
def write_jsonl(results, filename):
    with open(f'data/{filename}.json', 'w',encoding='utf-8') as outfile:
        for entry in results:
            json.dump(entry, outfile, ensure_ascii=False)
            outfile.write('\n')
            
def write_json(data, filename):
    with open(f'data/{filename}.json', 'w',encoding='utf-8') as file:
        json.dump(data, file)

In [7]:
import re


def extract_json(response):
    # 使用正则表达式匹配 JSON 数据
    match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
    if match:
        # 如果找到了 JSON 数据，则返回清理后的 JSON 字符串
        return match.group(1).strip()
    else:
        # 如果没有找到 JSON 数据，则返回原始响应
        return response


def process_data(data):
    query = get_question_prompt(data)
    respond = ''
    try:
        respond = call_qwen_api(MODEL_NAME, query)
        # 清理 JSON 字符串
        respond = extract_json(respond)
        # 解析 JSON 字符串
        data = json.loads(respond)
    except Exception as e:
        #print(respond)
        print(f"Error: {str(e)}")
    return data

In [8]:
file = read_file('data/round1_train_data.jsonl')

In [9]:
from tqdm import tqdm
import concurrent.futures
#file = file[:5]

# 并行批量处理
def batch_process_questions(file, max_workers=5):
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(process_data, data) for data in file]
        results = [future.result() for future in tqdm(concurrent.futures.as_completed(futures), total=len(file))]
    return results

results = batch_process_questions(file)

write_json(results,"output")

  1%|          | 6/500 [01:13<1:28:21, 10.73s/it]

Error: Expecting value: line 6 column 13 (char 400)


  3%|▎         | 13/500 [02:00<58:30,  7.21s/it]  

Error: Expecting value: line 6 column 13 (char 230)


  4%|▍         | 19/500 [02:48<55:48,  6.96s/it]  

Request id: 00249c6a-b572-9f01-ad4b-fc70ba2a9f0d, Status code: 400, error code: DataInspectionFailed, error message: Output data may contain inappropriate content.


  6%|▌         | 28/500 [04:28<1:25:11, 10.83s/it]

Error: Expecting value: line 6 column 13 (char 347)
Request id: 94fe2504-d10d-9685-9900-f654df48bb7b, Status code: 400, error code: DataInspectionFailed, error message: Output data may contain inappropriate content.


  7%|▋         | 37/500 [05:54<1:04:30,  8.36s/it]

Error: Unterminated string starting at: line 2 column 12 (char 13)


  8%|▊         | 40/500 [06:28<1:23:23, 10.88s/it]

Request id: 404d39a4-6dd5-9aa5-a2e4-71cdc1cc167a, Status code: 400, error code: DataInspectionFailed, error message: Output data may contain inappropriate content.
Error: 


 10%|█         | 52/500 [07:54<57:50,  7.75s/it]  

Error: Expecting value: line 11 column 13 (char 524)


 12%|█▏        | 62/500 [09:13<32:16,  4.42s/it]  

Error: Expecting value: line 8 column 72 (char 560)


 13%|█▎        | 64/500 [09:50<1:17:33, 10.67s/it]

Error: Expecting value: line 6 column 13 (char 593)


 15%|█▌        | 77/500 [11:41<1:06:32,  9.44s/it]

Error: Expecting value: line 6 column 13 (char 1147)


 16%|█▌        | 79/500 [11:50<49:56,  7.12s/it]  

Error: Expecting value: line 6 column 13 (char 200)


 17%|█▋        | 83/500 [12:16<35:30,  5.11s/it]  

Error: Expecting value: line 5 column 58 (char 160)


 25%|██▌       | 126/500 [18:31<1:25:18, 13.69s/it]

Error: Expecting ',' delimiter: line 16 column 147 (char 742)


 29%|██▉       | 147/500 [21:08<48:37,  8.26s/it]  

Error: Unterminated string starting at: line 2 column 12 (char 13)


 31%|███       | 154/500 [21:51<31:16,  5.42s/it]

Error: Expecting value: line 4 column 1 (char 117)


 36%|███▌      | 180/500 [25:41<47:50,  8.97s/it]  

Error: Expecting value: line 6 column 13 (char 163)


 37%|███▋      | 187/500 [26:31<43:40,  8.37s/it]

Error: Expecting value: line 6 column 13 (char 219)


 42%|████▏     | 208/500 [29:23<38:54,  7.99s/it]  

Error: Expecting value: line 6 column 13 (char 428)


 45%|████▍     | 224/500 [31:33<31:30,  6.85s/it]

Error: Invalid \escape: line 2 column 85 (char 86)


 49%|████▊     | 243/500 [34:00<29:22,  6.86s/it]

Error: Expecting value: line 6 column 13 (char 444)


 49%|████▉     | 245/500 [34:27<42:57, 10.11s/it]

Error: Expecting value: line 6 column 13 (char 752)


 50%|█████     | 251/500 [35:14<31:17,  7.54s/it]

Error: Expecting value: line 6 column 13 (char 229)


 53%|█████▎    | 265/500 [37:10<27:47,  7.10s/it]

Error: Expecting property name enclosed in double quotes: line 2 column 1 (char 2)


 54%|█████▍    | 269/500 [37:50<34:31,  8.97s/it]

Error: Unterminated string starting at: line 2 column 12 (char 13)


 54%|█████▍    | 272/500 [38:14<37:24,  9.84s/it]

Error: Expecting value: line 5 column 13 (char 262)


 57%|█████▋    | 285/500 [39:33<16:00,  4.47s/it]

Error: Expecting ',' delimiter: line 2 column 66 (char 67)


 63%|██████▎   | 316/500 [43:54<23:07,  7.54s/it]

Error: Expecting value: line 4 column 218 (char 519)


 73%|███████▎  | 364/500 [50:20<19:51,  8.76s/it]

Error: Unterminated string starting at: line 2 column 12 (char 13)


 78%|███████▊  | 388/500 [53:36<13:26,  7.20s/it]

Error: Unterminated string starting at: line 2 column 12 (char 13)


 78%|███████▊  | 390/500 [53:57<17:09,  9.36s/it]

Error: Unterminated string starting at: line 2 column 12 (char 13)


 78%|███████▊  | 391/500 [54:00<13:24,  7.38s/it]

Error: Expecting value: line 6 column 13 (char 367)


 79%|███████▉  | 396/500 [54:48<16:17,  9.39s/it]

Error: Expecting value: line 6 column 13 (char 473)


 83%|████████▎ | 416/500 [57:32<09:20,  6.67s/it]

Error: Expecting value: line 6 column 13 (char 396)


 89%|████████▊ | 443/500 [1:00:57<06:50,  7.21s/it]

Error: Expecting value: line 6 column 13 (char 311)


 90%|█████████ | 452/500 [1:02:12<08:11, 10.24s/it]

Error: Expecting value: line 6 column 13 (char 264)


 91%|█████████ | 454/500 [1:02:18<05:11,  6.77s/it]

Error: Unterminated string starting at: line 2 column 12 (char 13)


 92%|█████████▏| 458/500 [1:02:47<04:12,  6.00s/it]

Error: Expecting value: line 6 column 13 (char 451)


 96%|█████████▋| 482/500 [1:05:58<02:00,  6.70s/it]

Error: Expecting value: line 6 column 13 (char 594)


 97%|█████████▋| 483/500 [1:06:03<01:43,  6.06s/it]

Error: Expecting value: line 6 column 65 (char 309)


100%|██████████| 500/500 [1:08:37<00:00,  8.23s/it]
