In [1]:
!pip install scipy  tiktoken retry loguru

Looking in indexes: https://mirrors.aliyun.com/pypi/simple
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import json
import os
from pprint import pprint
import re
from tqdm import tqdm
import random

import uuid
import openai
from openai import OpenAI
import tiktoken
import json
import numpy as np
import requests
from scipy import sparse
#from rank_bm25 import BM25Okapi
#import jieba
from http import HTTPStatus


from concurrent.futures import ThreadPoolExecutor, as_completed
from loguru import logger
import json
import time
from tqdm import tqdm

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
#模型下载
from modelscope import snapshot_download

mode_path = snapshot_download('qwen/Qwen2-7B-Instruct')
#mode_path = '../qwen/Qwen2-7B-Instruct/'
lora_path = './output/Qwen2_instruct_lora/20240818/checkpoint-400' # 这里改称你的 lora 输出对应 checkpoint 地址

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto",torch_dtype=torch.float16, trust_remote_code=True).eval()

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path)

logger.remove()  # 移除默认的控制台输出
logger.add("logs/app_{time:YYYY-MM-DD}.log", level="INFO", rotation="00:00", retention="10 days", compression="zip")

MODEL_NAME = 'Qwen2-7B-Instruct-lora'
model = model.to('cuda:0')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
def api_retry(MODEL_NAME, query):
    max_retries = 5
    retry_delay = 60  # in seconds
    attempts = 0
    while attempts < max_retries:
        try:
            return call_qwen_local(MODEL_NAME, query) #call_qwen_api(MODEL_NAME, query)
        except Exception as e:
            attempts += 1   
            if attempts < max_retries:
                logger.warning(f"Attempt {attempts} failed for text: {query}. Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                logger.error(f"All {max_retries} attempts failed for text: {query}. Error: {e}")
                raise

In [5]:
def call_qwen_local(MODEL_NAME, query):
    inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}],
                                       add_generation_prompt=True,
                                       tokenize=True,
                                       return_tensors="pt",
                                       return_dict=True
                                       ).to('cuda')

    gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
    with torch.no_grad():
        outputs = model.generate(**inputs, **gen_kwargs)
        outputs = outputs[:, inputs['input_ids'].shape[1]:]
        output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return output

In [6]:
def call_qwen_api(MODEL_NAME, query):
    # 这里采用dashscope的api调用模型推理，通过http传输的json封装返回结果
    
    client = OpenAI(
        base_url="http://localhost:8000/v1",
        api_key="sk-xxx", # 随便填写，只是为了通过接口参数校验
    )
    completion = client.chat.completions.create(
      model=MODEL_NAME,
      messages=[
                # {'role':'system','content':'你是一个解决推理任务的专家，你需要分析出问题中的每个实体以及响应关系。然后根据问题一步步推理出结果。并且给出正确的结论。'},

        {"role": "user", "content": query}
      ]
    )
    return completion.choices[0].message.content

In [5]:
# call_qwen_api(MODEL_NAME,'你好')

In [7]:
# 这里定义了prompt推理模版
import random
def get_prompt(problem, question, options):

    options = [f"{'ABCDEFG'[i]}. {o}" for i, o in enumerate(options)]
    #random.shuffle(options)
    options = '\n'.join(options)

    prompt = f"""你是一个逻辑推理专家，擅长解决逻辑推理问题。以下是一个逻辑推理的题目，形式为单项选择题。所有的问题都是（close-world assumption）闭世界假设，即未观测事实都为假。请逐步分析问题并在最后一行输出答案，最后一行的格式为"答案是：A"。题目如下：

### 题目:
{problem}
### 问题:
{question}
### 选项:
{options}
"""
    # print(prompt)
    return prompt


In [8]:
# 这里使用extract抽取模获得抽取的结果

def extract(input_text):
    #ans_pattern = re.compile(r"(.)", re.S)
    
    ans_pattern = re.compile(r"答案是：(.*)", re.S)
    problems = ans_pattern.findall(input_text)
    if(problems == '' or len(problems) == 0):
        return 'A'
    possible_answers = "ABCDEFG"  # 假设可能的答案字母是 A, B, C, D
    answers = [match for match in problems[0] if match in possible_answers]


    problems = ans_pattern.findall(input_text)
    # print(problems)
    if len(answers) == 0:
        return 'A'
    return answers[0]

In [9]:
def process_datas(datas,MODEL_NAME):
    results = []
    with ThreadPoolExecutor(max_workers=16) as executor:
        future_data = {}
        lasttask = ''
        lastmark = 0
        lens = 0
        # 送入多线程任务
        for data in tqdm(datas, desc="Submitting tasks", total=len(datas)):
            problem = data['problem']
            for id,question in enumerate(data['questions']):
                prompt = get_prompt(problem, 
                                    question['question'], 
                                    question['options'],
                                    )

                future = executor.submit(api_retry, MODEL_NAME, prompt)
                
                future_data[future] = (data,id)
                #time.sleep(0.6)  # 控制每0.6秒提交一个任务
                lens += 1
        # 处理多线程任务
        for future in tqdm(as_completed(future_data), total=lens, desc="Processing tasks"):
            # print('data',data)
            data = future_data[future][0]
            problem_id = future_data[future][1]
            try:
                # res  = future.result()
                # extract_response = extract(res)
                # print('res',extract_response)
                # data['questions'][problem_id]['answer'] = extract_response
                
                res, res1, res2 = future.result(), future.result(), future.result()
                extract_response, extract_response1, extract_response2 = extract(res), extract(res1), extract(res2)
                ans = most_frequent_char(extract_response, extract_response1, extract_response2)
                data['questions'][problem_id]['qwen_respond'] = res
                data['questions'][problem_id][MODEL_NAME] = ans
               
                results.append(data)
                # print('data',data)
                
            except Exception as e:
                logger.error(f"Failed to process text: {data}. Error: {e}")
    
    return results

In [10]:
def most_frequent_char(char1, char2, char3):
    # 创建一个字典来存储每个字符的出现次数
    frequency = {char1: 0, char2: 0, char3: 0}
    
    # 增加每个字符的出现次数
    frequency[char1] += 1
    frequency[char2] += 1
    frequency[char3] += 1
    
    # 找到出现次数最多的字符
    most_frequent = max(frequency, key=frequency.get)
    
    return most_frequent

In [11]:
def main(ifn, ofn):
    if os.path.exists(ofn):
        pass
    data = []
    # 按行读取数据
    with open(ifn) as reader:
        for line in reader:
            sample = json.loads(line)
            data.append(sample)
    datas = data[:10]
    # print(data)
    # 均匀地分成多个数据集
    return_list = process_datas(datas,MODEL_NAME)
    print(len(return_list))
    print("All tasks finished!")
    return return_list

In [12]:
def evaluate(data):
    data = data

    pse = 0
    cnt = 0
    tot = 0
    for task in data:
        for question in task['questions']:
            print(task)
            if MODEL_NAME in question:
                
                tot += 1
                cnt += question[MODEL_NAME] == question['answer']
            else:
                pse += 1

    print(cnt, tot, cnt/tot, pse)

In [13]:
if __name__ == '__main__':

    a = extract("""根据欧几里得算法，逐步解析计算两个数6和7的最大公约数（gcd）的步骤如下：

1. 判断6和7是否相等：不相等。
2. 判断6和7大小关系，7 > 6，所以用更大的数7减去较小的数6得到结果1。
3. 现在计算6和1的最大公约数。
4. 6 > 1，根据算法用更大的数6减去较小的数1得到结果5。
5. 再计算5和1的最大公约数。
6. 5 > 1，用5减去1得到结果4。
7. 再计算4和1的最大公约数。
8. 4 > 1，用4减去1得到结果3。
9. 再计算3和1的最大公约数。
10. 3 > 1，用3减去1得到结果2。
11. 再计算2和1的最大公约数。
12. 2 > 1，用2减去1得到结果1。
13. 最后计算1和1的最大公约数，两数相等，gcd即为这两个数，也就是1。

因此，6和7的最大公约数是1。

答案是：C.""")

    print(a)
    return_list = main('data/round1_test_data.jsonl', 'upload_test.jsonl')


C


Submitting tasks: 100%|██████████| 10/10 [00:00<00:00, 1086.41it/s]
Processing tasks:  48%|████▊     | 12/25 [01:12<01:29,  6.86s/it]../aten/src/ATen/native/cuda/Indexing.cu:1236: indexSelectSmallIndex: block: [15,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1236: indexSelectSmallIndex: block: [15,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1236: indexSelectSmallIndex: block: [15,0,0], thread: [2,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1236: indexSelectSmallIndex: block: [15,0,0], thread: [3,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1236: indexSelectSmallIndex: block: [15,0,0], thread: [4,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1236: indexSelectSmallIndex: block: [15,0,0], thread: [5,0,0] Assertion

12
All tasks finished!





In [14]:
return_list

[{'problem': '设有两个列表操作，第一个列表中包含若干元素，并且可以将第二个列表的元素追加到第一个列表的末尾形成一个新的列表。根据这个操作，请回答以下问题：',
  'questions': [{'question': '选择题 1：\n当第一个列表是[a, b, c]，第二个列表是[d, e]时，新的列表是什么？',
    'options': ['[a, b, c, d]',
     '[a, d, e, b, c]',
     '[a, b, c, d, e]',
     '[d, e, a, b, c]']},
   {'question': '选择题 2：\n如果新列表是[a, b, c, d, e]，而第二个列表是[d, e]，那么第一个列表是什么？',
    'options': ['[a]', '[a, b]', '[a, b, c]', '[b, c]'],
    'qwen_respond': '根据题目描述，操作是将第二个列表的元素追加到第一个列表的末尾。因此，如果新列表是[a, b, c, d, e]，并且第二个列表是[d, e]，那么第一个列表必须包含除了d和e之外的所有元素，即[a, b, c]。\n\n答案是：C',
    'Qwen2-7B-Instruct-lora': 'C'},
   {'question': '选择题 3：\n当第一个列表是[a, b, c]，新的列表是[a, b, c, d, e]，那么第二个列表是什么？',
    'options': ['[d, e]', '[b, c, d]', '[c, d, e]', '[b, d, e]']}],
  'id': 'round1_test_data_001'},
 {'problem': '在一个社交场景中，有几个人互相间存在不同的情感关系。已知的关系如下：\n\n1. Scarlet, White, Peacock 和 Orchid 是女性。\n2. Plum, Mustard 和 Green 是男性。\n3. Scarlet 讨厌 Green。\n4. Green 讨厌 White 和 Scarlet。\n5. White 讨厌 Green 和 Plum。\n6. Plum 讨厌 White。\n7. Mustard 讨厌所有其他人。

In [16]:
def has_complete_answer(questions):
    # 这里假设完整答案的判断逻辑是：每个question都有一个'answer'键
    for question in questions:
        if MODEL_NAME not in question:
            return False
    return True

def filter_problems(data):
    result = []
    problem_set = set()

    for item in data:
        # print('处理的item' ,item)
        problem = item['problem']
        if problem in problem_set:
            # 找到已存在的字典
            for existing_item in result:
                if existing_item['problem'] == problem:
                    # 如果当前字典有完整答案，替换已存在的字典
                    if has_complete_answer(item['questions']):
                        existing_item['questions'] = item['questions']
                        existing_item['id'] = item['id']
                    break
        else:
            # 如果当前字典有完整答案，添加到结果列表
            if has_complete_answer(item['questions']):
                result.append(item)
                problem_set.add(problem)

    return result

In [34]:
return_list

[]

In [17]:
return_list
return_list = filter_problems(return_list)
sorted_data = sorted(return_list, key=lambda x: int(str(x['id'])[-3:]))
print(sorted_data)

[{'problem': '有一个列表，找出该列表的最后一个元素。\n\n下列选项中哪个是列表 `[a, b, c, d]` 的最后一个元素？', 'questions': [{'question': '选择题 1：', 'options': ['a', 'b', 'c', 'd'], 'qwen_respond': '答案是：D', 'Qwen2-7B-Instruct-lora': 'D'}], 'id': 'round1_test_data_002'}, {'problem': '在一个社交场景中，有几个人互相间存在不同的情感关系。已知的关系如下：\n\n1. Scarlet, White, Peacock 和 Orchid 是女性。\n2. Plum, Mustard 和 Green 是男性。\n3. Scarlet 讨厌 Green。\n4. Green 讨厌 White 和 Scarlet。\n5. White 讨厌 Green 和 Plum。\n6. Plum 讨厌 White。\n7. Mustard 讨厌所有其他人。\n8. Scarlet 喜欢 Orchid、White 和 Plum。\n9. Peacock 喜欢 Orchid。\n10. Orchid 喜欢 Peacock。\n11. Plum 喜欢所有人，包括自己。\n12. 如果两个人互相讨厌，他们是敌人。\n13. 如果两个人互相喜欢，他们是朋友。\n14. 如果某人的两个敌人是敌人，那么这两个敌人也是朋友。\n\n根据以上描述，回答以下选择题：', 'questions': [{'question': '选择题 1：\nWhite 是朋友的名单包括哪些人？', 'options': ['Scarlet', 'Green', 'White', 'Mustard', 'Plum'], 'qwen_respond': '首先，我们根据已知信息来确定每个人的情感关系。\n\n- Scarlet, White, Peacock 和 Orchid 是女性。\n- Plum, Mustard 和 Green 是男性。\n- Scarlet 讨厌 Green。\n- Green 讨厌 White 和 Scarlet。\n- White 讨厌 Green 和 Plum。\n- Plum �已经在第6条中

In [18]:
len(sorted_data)

5

In [None]:
def find_missing_ids(dict_list):
    # 提取所有序号
    extracted_ids = {int(d['id'][-3:]) for d in dict_list}
    
    # 创建0-500的序号集合
    all_ids = set(range(10))
    
    # 找出缺失的序号
    missing_ids = all_ids - extracted_ids
    
    return sorted(missing_ids)

# 示例字典列表
dict_list = sorted_data

# 找出缺失的序号
missing_ids = find_missing_ids(dict_list)
print("缺失的序号:", missing_ids)

缺失的序号: []


In [None]:
len(missing_ids)

0

In [None]:
data  = []
with open('round1_test_data.jsonl') as reader:
    for id,line in enumerate(reader):
        if(id in missing_ids):
            sample = json.loads(line)
            for question in sample['questions']:
                question['answer'] = 'A'
            sorted_data.append(sample)
sorted_data = sorted(sorted_data, key=lambda x: int(str(x['id'])[-3:]))
        

In [None]:
with open('upload_test.jsonl', 'w') as writer:
    for sample in sorted_data:
        writer.write(json.dumps(sample, ensure_ascii=False))
        writer.write('\n')

In [None]:
evaluate(sorted_data)

{'problem': '有一个英文到法文的词汇表，包含以下对应词汇：\n\n1. the -> le\n2. cat -> chat\n3. jumps -> sauts\n4. over -> sur\n5. moon -> lune\n6. cow -> vache\n7. plays -> jouer\n8. fiddle -> violon\n9. egg -> bougre\n10. falls -> des chutes\n11. off -> de\n12. wall -> mur\n\n根据这个词汇表，翻译以下英文句子成法文：', 'questions': [{'question': '选择题 1：\n英文句子 "the cat jumps over the moon" 翻译成法文是：', 'options': ['le chat saute sur la lune', 'le chat sauts sur le lune', 'le sauts chat sur le lune', 'le chat sauts sur le lune'], 'answer': 'D', 'Qwen2-7B-Instruct-lora': 'A'}, {'question': '选择题 2：\n英文句子 "the cow plays the fiddle" 翻译成法文是：', 'options': ['le vache jouer le violon', 'le jouer vache le violon', 'le vache jouer la vièle', 'la vache joue le violon'], 'answer': 'A', 'Qwen2-7B-Instruct-lora': 'A'}, {'question': '选择题 3：\n英文句子 "the egg falls off the wall" 翻译成法文是：', 'options': ['le bougre des chutes de le mur', 'le oeuf tombe de le mur', 'le bougre tomber de le mur', 'des chutes le bougre de le mur'], 'answer': 'A', 'Qwen2-7B-In

In [11]:
def format_submit_file(file):
    validated_problem_sets = []
    for data in file:
        problem = data['problem']
        questions = data['questions']
        _id = data['id']
        validated_questions = []
        for question in questions:
            answer = question['answer']
            # 检查答案是否为大写字母
            if len(answer) == 1 and answer.isupper():
                validated_questions.append(question)
            else:
                print(f"Invalid answer '{answer}' for question: {question['question']}")
        # 创建一个新的问题集字典，只包含有效的问题
        validated_problem_set = {
            'problem': problem,
            'questions': validated_questions,
            'id': _id
        }
        validated_problem_sets.append(validated_problem_set)
    return validated_problem_sets

False

In [None]:
def write_jsonl(results, filename):
    with open(f'result/{filename}.json', 'w',encoding='utf-8') as outfile:
        for entry in results:
            json.dump(entry, outfile, ensure_ascii=False)
            outfile.write('\n')

In [None]:
submit = format_submit_file(sorted_data)
write_jsonl(submit,"submit")
